VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllInstructionsPython.py@ 99289

Last change on this file since 99289 was 99288, checked in by vboxsync, 2 years ago

VMM/IEM: More work on processing MC blocks and generating threaded functions from them. bugref:10369

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 252.7 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: IEMAllInstructionsPython.py 99288 2023-04-04 23:55:49Z vboxsync $
4
5"""
6IEM instruction extractor.
7
8This script/module parses the IEMAllInstruction*.cpp.h files next to it and
9collects information about the instructions. It can then be used to generate
10disassembler tables and tests.
11"""
12
13from __future__ import print_function;
14
15__copyright__ = \
16"""
17Copyright (C) 2017-2023 Oracle and/or its affiliates.
18
19This file is part of VirtualBox base platform packages, as
20available from https://www.virtualbox.org.
21
22This program is free software; you can redistribute it and/or
23modify it under the terms of the GNU General Public License
24as published by the Free Software Foundation, in version 3 of the
25License.
26
27This program is distributed in the hope that it will be useful, but
28WITHOUT ANY WARRANTY; without even the implied warranty of
29MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
30General Public License for more details.
31
32You should have received a copy of the GNU General Public License
33along with this program; if not, see <https://www.gnu.org/licenses>.
34
35The contents of this file may alternatively be used under the terms
36of the Common Development and Distribution License Version 1.0
37(CDDL), a copy of it is provided in the "COPYING.CDDL" file included
38in the VirtualBox distribution, in which case the provisions of the
39CDDL are applicable instead of those of the GPL.
40
41You may elect to license modified versions of this file under the
42terms and conditions of either the GPL or the CDDL or both.
43
44SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
45"""
46__version__ = "$Revision: 99288 $"
47
48# pylint: disable=anomalous-backslash-in-string,too-many-lines
49
50# Standard python imports.
51import os;
52import re;
53import sys;
54import traceback;
55
56## Only the main script needs to modify the path.
57#g_ksValidationKitDir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
58# 'ValidationKit');
59#sys.path.append(g_ksValidationKitDir);
60#
61#from common import utils; - Windows build boxes doesn't have pywin32.
62
63# Python 3 hacks:
64if sys.version_info[0] >= 3:
65 long = int; # pylint: disable=redefined-builtin,invalid-name
66
67
68g_kdX86EFlagsConstants = {
69 'X86_EFL_CF': 0x00000001, # RT_BIT_32(0)
70 'X86_EFL_1': 0x00000002, # RT_BIT_32(1)
71 'X86_EFL_PF': 0x00000004, # RT_BIT_32(2)
72 'X86_EFL_AF': 0x00000010, # RT_BIT_32(4)
73 'X86_EFL_ZF': 0x00000040, # RT_BIT_32(6)
74 'X86_EFL_SF': 0x00000080, # RT_BIT_32(7)
75 'X86_EFL_TF': 0x00000100, # RT_BIT_32(8)
76 'X86_EFL_IF': 0x00000200, # RT_BIT_32(9)
77 'X86_EFL_DF': 0x00000400, # RT_BIT_32(10)
78 'X86_EFL_OF': 0x00000800, # RT_BIT_32(11)
79 'X86_EFL_IOPL': 0x00003000, # (RT_BIT_32(12) | RT_BIT_32(13))
80 'X86_EFL_NT': 0x00004000, # RT_BIT_32(14)
81 'X86_EFL_RF': 0x00010000, # RT_BIT_32(16)
82 'X86_EFL_VM': 0x00020000, # RT_BIT_32(17)
83 'X86_EFL_AC': 0x00040000, # RT_BIT_32(18)
84 'X86_EFL_VIF': 0x00080000, # RT_BIT_32(19)
85 'X86_EFL_VIP': 0x00100000, # RT_BIT_32(20)
86 'X86_EFL_ID': 0x00200000, # RT_BIT_32(21)
87 'X86_EFL_LIVE_MASK': 0x003f7fd5, # UINT32_C(0x003f7fd5)
88 'X86_EFL_RA1_MASK': 0x00000002, # RT_BIT_32(1)
89};
90
91## EFlags values allowed in \@opfltest, \@opflmodify, \@opflundef, \@opflset, and \@opflclear.
92g_kdEFlagsMnemonics = {
93 # Debugger flag notation (sorted by value):
94 'cf': 'X86_EFL_CF', ##< Carry Flag.
95 'nc': '!X86_EFL_CF', ##< No Carry.
96
97 'po': 'X86_EFL_PF', ##< Parity Pdd.
98 'pe': '!X86_EFL_PF', ##< Parity Even.
99
100 'af': 'X86_EFL_AF', ##< Aux Flag.
101 'na': '!X86_EFL_AF', ##< No Aux.
102
103 'zr': 'X86_EFL_ZF', ##< ZeRo.
104 'nz': '!X86_EFL_ZF', ##< No Zero.
105
106 'ng': 'X86_EFL_SF', ##< NeGative (sign).
107 'pl': '!X86_EFL_SF', ##< PLuss (sign).
108
109 'tf': 'X86_EFL_TF', ##< Trap flag.
110
111 'ei': 'X86_EFL_IF', ##< Enabled Interrupts.
112 'di': '!X86_EFL_IF', ##< Disabled Interrupts.
113
114 'dn': 'X86_EFL_DF', ##< DowN (string op direction).
115 'up': '!X86_EFL_DF', ##< UP (string op direction).
116
117 'ov': 'X86_EFL_OF', ##< OVerflow.
118 'nv': '!X86_EFL_OF', ##< No Overflow.
119
120 'nt': 'X86_EFL_NT', ##< Nested Task.
121 'rf': 'X86_EFL_RF', ##< Resume Flag.
122 'vm': 'X86_EFL_VM', ##< Virtual-8086 Mode.
123 'ac': 'X86_EFL_AC', ##< Alignment Check.
124 'vif': 'X86_EFL_VIF', ##< Virtual Interrupt Flag.
125 'vip': 'X86_EFL_VIP', ##< Virtual Interrupt Pending.
126
127 # Reference manual notation not covered above (sorted by value):
128 'pf': 'X86_EFL_PF',
129 'zf': 'X86_EFL_ZF',
130 'sf': 'X86_EFL_SF',
131 'if': 'X86_EFL_IF',
132 'df': 'X86_EFL_DF',
133 'of': 'X86_EFL_OF',
134 'iopl': 'X86_EFL_IOPL',
135 'id': 'X86_EFL_ID',
136};
137
138## Constants and values for CR0.
139g_kdX86Cr0Constants = {
140 'X86_CR0_PE': 0x00000001, # RT_BIT_32(0)
141 'X86_CR0_MP': 0x00000002, # RT_BIT_32(1)
142 'X86_CR0_EM': 0x00000004, # RT_BIT_32(2)
143 'X86_CR0_TS': 0x00000008, # RT_BIT_32(3)
144 'X86_CR0_ET': 0x00000010, # RT_BIT_32(4)
145 'X86_CR0_NE': 0x00000020, # RT_BIT_32(5)
146 'X86_CR0_WP': 0x00010000, # RT_BIT_32(16)
147 'X86_CR0_AM': 0x00040000, # RT_BIT_32(18)
148 'X86_CR0_NW': 0x20000000, # RT_BIT_32(29)
149 'X86_CR0_CD': 0x40000000, # RT_BIT_32(30)
150 'X86_CR0_PG': 0x80000000, # RT_BIT_32(31)
151};
152
153## Constants and values for CR4.
154g_kdX86Cr4Constants = {
155 'X86_CR4_VME': 0x00000001, # RT_BIT_32(0)
156 'X86_CR4_PVI': 0x00000002, # RT_BIT_32(1)
157 'X86_CR4_TSD': 0x00000004, # RT_BIT_32(2)
158 'X86_CR4_DE': 0x00000008, # RT_BIT_32(3)
159 'X86_CR4_PSE': 0x00000010, # RT_BIT_32(4)
160 'X86_CR4_PAE': 0x00000020, # RT_BIT_32(5)
161 'X86_CR4_MCE': 0x00000040, # RT_BIT_32(6)
162 'X86_CR4_PGE': 0x00000080, # RT_BIT_32(7)
163 'X86_CR4_PCE': 0x00000100, # RT_BIT_32(8)
164 'X86_CR4_OSFXSR': 0x00000200, # RT_BIT_32(9)
165 'X86_CR4_OSXMMEEXCPT': 0x00000400, # RT_BIT_32(10)
166 'X86_CR4_VMXE': 0x00002000, # RT_BIT_32(13)
167 'X86_CR4_SMXE': 0x00004000, # RT_BIT_32(14)
168 'X86_CR4_PCIDE': 0x00020000, # RT_BIT_32(17)
169 'X86_CR4_OSXSAVE': 0x00040000, # RT_BIT_32(18)
170 'X86_CR4_SMEP': 0x00100000, # RT_BIT_32(20)
171 'X86_CR4_SMAP': 0x00200000, # RT_BIT_32(21)
172 'X86_CR4_PKE': 0x00400000, # RT_BIT_32(22)
173};
174
175## XSAVE components (XCR0).
176g_kdX86XSaveCConstants = {
177 'XSAVE_C_X87': 0x00000001,
178 'XSAVE_C_SSE': 0x00000002,
179 'XSAVE_C_YMM': 0x00000004,
180 'XSAVE_C_BNDREGS': 0x00000008,
181 'XSAVE_C_BNDCSR': 0x00000010,
182 'XSAVE_C_OPMASK': 0x00000020,
183 'XSAVE_C_ZMM_HI256': 0x00000040,
184 'XSAVE_C_ZMM_16HI': 0x00000080,
185 'XSAVE_C_PKRU': 0x00000200,
186 'XSAVE_C_LWP': 0x4000000000000000,
187 'XSAVE_C_X': 0x8000000000000000,
188 'XSAVE_C_ALL_AVX': 0x000000c4, # For clearing all AVX bits.
189 'XSAVE_C_ALL_AVX_SSE': 0x000000c6, # For clearing all AVX and SSE bits.
190};
191
192
193## \@op[1-4] locations
194g_kdOpLocations = {
195 'reg': [], ## modrm.reg
196 'rm': [], ## modrm.rm
197 'imm': [], ## immediate instruction data
198 'vvvv': [], ## VEX.vvvv
199
200 # fixed registers.
201 'AL': [],
202 'rAX': [],
203 'rDX': [],
204 'rSI': [],
205 'rDI': [],
206 'rFLAGS': [],
207 'CS': [],
208 'DS': [],
209 'ES': [],
210 'FS': [],
211 'GS': [],
212 'SS': [],
213};
214
215## \@op[1-4] types
216##
217## Value fields:
218## - 0: the normal IDX_ParseXXX handler (IDX_UseModRM == IDX_ParseModRM).
219## - 1: the location (g_kdOpLocations).
220## - 2: disassembler format string version of the type.
221## - 3: disassembler OP_PARAM_XXX (XXX only).
222## - 4: IEM form matching instruction.
223##
224## Note! See the A.2.1 in SDM vol 2 for the type names.
225g_kdOpTypes = {
226 # Fixed addresses
227 'Ap': ( 'IDX_ParseImmAddrF', 'imm', '%Ap', 'Ap', 'FIXED', ),
228
229 # ModR/M.rm
230 'Eb': ( 'IDX_UseModRM', 'rm', '%Eb', 'Eb', 'RM', ),
231 'Ed': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', 'RM', ),
232 'Ed_WO': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', 'RM', ),
233 'Eq': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', 'RM', ),
234 'Eq_WO': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', 'RM', ),
235 'Ew': ( 'IDX_UseModRM', 'rm', '%Ew', 'Ew', 'RM', ),
236 'Ev': ( 'IDX_UseModRM', 'rm', '%Ev', 'Ev', 'RM', ),
237 'Ey': ( 'IDX_UseModRM', 'rm', '%Ey', 'Ey', 'RM', ),
238 'Qd': ( 'IDX_UseModRM', 'rm', '%Qd', 'Qd', 'RM', ),
239 'Qq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', 'RM', ),
240 'Qq_WO': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', 'RM', ),
241 'Wss': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', 'RM', ),
242 'Wss_WO': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', 'RM', ),
243 'Wsd': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', 'RM', ),
244 'Wsd_WO': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', 'RM', ),
245 'Wps': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', 'RM', ),
246 'Wps_WO': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', 'RM', ),
247 'Wpd': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', 'RM', ),
248 'Wpd_WO': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', 'RM', ),
249 'Wdq': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', 'RM', ),
250 'Wdq_WO': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', 'RM', ),
251 'Wq': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
252 'Wq_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
253 'WqZxReg_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
254 'Wx': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', 'RM', ),
255 'Wx_WO': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', 'RM', ),
256
257 # ModR/M.rm - register only.
258 'Uq': ( 'IDX_UseModRM', 'rm', '%Uq', 'Uq', 'REG' ),
259 'UqHi': ( 'IDX_UseModRM', 'rm', '%Uq', 'UqHi', 'REG' ),
260 'Uss': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', 'REG' ),
261 'Uss_WO': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', 'REG' ),
262 'Usd': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', 'REG' ),
263 'Usd_WO': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', 'REG' ),
264 'Ux': ( 'IDX_UseModRM', 'rm', '%Ux', 'Ux', 'REG' ),
265 'Nq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Nq', 'REG' ),
266
267 # ModR/M.rm - memory only.
268 'Ma': ( 'IDX_UseModRM', 'rm', '%Ma', 'Ma', 'MEM', ), ##< Only used by BOUND.
269 'Mb_RO': ( 'IDX_UseModRM', 'rm', '%Mb', 'Mb', 'MEM', ),
270 'Md': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
271 'Md_RO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
272 'Md_WO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
273 'Mdq': ( 'IDX_UseModRM', 'rm', '%Mdq', 'Mdq', 'MEM', ),
274 'Mdq_WO': ( 'IDX_UseModRM', 'rm', '%Mdq', 'Mdq', 'MEM', ),
275 'Mq': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', 'MEM', ),
276 'Mq_WO': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', 'MEM', ),
277 'Mps_WO': ( 'IDX_UseModRM', 'rm', '%Mps', 'Mps', 'MEM', ),
278 'Mpd_WO': ( 'IDX_UseModRM', 'rm', '%Mpd', 'Mpd', 'MEM', ),
279 'Mx': ( 'IDX_UseModRM', 'rm', '%Mx', 'Mx', 'MEM', ),
280 'Mx_WO': ( 'IDX_UseModRM', 'rm', '%Mx', 'Mx', 'MEM', ),
281 'M_RO': ( 'IDX_UseModRM', 'rm', '%M', 'M', 'MEM', ),
282 'M_RW': ( 'IDX_UseModRM', 'rm', '%M', 'M', 'MEM', ),
283
284 # ModR/M.reg
285 'Gb': ( 'IDX_UseModRM', 'reg', '%Gb', 'Gb', '', ),
286 'Gw': ( 'IDX_UseModRM', 'reg', '%Gw', 'Gw', '', ),
287 'Gd': ( 'IDX_UseModRM', 'reg', '%Gd', 'Gd', '', ),
288 'Gv': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', '', ),
289 'Gv_RO': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', '', ),
290 'Gy': ( 'IDX_UseModRM', 'reg', '%Gy', 'Gy', '', ),
291 'Pd': ( 'IDX_UseModRM', 'reg', '%Pd', 'Pd', '', ),
292 'PdZx_WO': ( 'IDX_UseModRM', 'reg', '%Pd', 'PdZx', '', ),
293 'Pq': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', '', ),
294 'Pq_WO': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', '', ),
295 'Vd': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
296 'Vd_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
297 'VdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
298 'Vdq': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', '', ),
299 'Vss': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
300 'Vss_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
301 'VssZx_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
302 'Vsd': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
303 'Vsd_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
304 'VsdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
305 'Vps': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', '', ),
306 'Vps_WO': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', '', ),
307 'Vpd': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', '', ),
308 'Vpd_WO': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', '', ),
309 'Vq': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', '', ),
310 'Vq_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', '', ),
311 'Vdq_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', '', ),
312 'VqHi': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', '', ),
313 'VqHi_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', '', ),
314 'VqZx_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'VqZx', '', ),
315 'Vx': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', '', ),
316 'Vx_WO': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', '', ),
317
318 # VEX.vvvv
319 'By': ( 'IDX_UseModRM', 'vvvv', '%By', 'By', 'V', ),
320 'Hps': ( 'IDX_UseModRM', 'vvvv', '%Hps', 'Hps', 'V', ),
321 'Hpd': ( 'IDX_UseModRM', 'vvvv', '%Hpd', 'Hpd', 'V', ),
322 'HssHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HssHi', 'V', ),
323 'HsdHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HsdHi', 'V', ),
324 'Hq': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'Hq', 'V', ),
325 'HqHi': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'HqHi', 'V', ),
326 'Hx': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'Hx', 'V', ),
327
328 # Immediate values.
329 'Ib': ( 'IDX_ParseImmByte', 'imm', '%Ib', 'Ib', '', ), ##< NB! Could be IDX_ParseImmByteSX for some instrs.
330 'Iw': ( 'IDX_ParseImmUshort', 'imm', '%Iw', 'Iw', '', ),
331 'Id': ( 'IDX_ParseImmUlong', 'imm', '%Id', 'Id', '', ),
332 'Iq': ( 'IDX_ParseImmQword', 'imm', '%Iq', 'Iq', '', ),
333 'Iv': ( 'IDX_ParseImmV', 'imm', '%Iv', 'Iv', '', ), ##< o16: word, o32: dword, o64: qword
334 'Iz': ( 'IDX_ParseImmZ', 'imm', '%Iz', 'Iz', '', ), ##< o16: word, o32|o64:dword
335
336 # Address operands (no ModR/M).
337 'Ob': ( 'IDX_ParseImmAddr', 'imm', '%Ob', 'Ob', '', ),
338 'Ov': ( 'IDX_ParseImmAddr', 'imm', '%Ov', 'Ov', '', ),
339
340 # Relative jump targets
341 'Jb': ( 'IDX_ParseImmBRel', 'imm', '%Jb', 'Jb', '', ),
342 'Jv': ( 'IDX_ParseImmVRel', 'imm', '%Jv', 'Jv', '', ),
343
344 # DS:rSI
345 'Xb': ( 'IDX_ParseXb', 'rSI', '%eSI', 'Xb', '', ),
346 'Xv': ( 'IDX_ParseXv', 'rSI', '%eSI', 'Xv', '', ),
347 # ES:rDI
348 'Yb': ( 'IDX_ParseYb', 'rDI', '%eDI', 'Yb', '', ),
349 'Yv': ( 'IDX_ParseYv', 'rDI', '%eDI', 'Yv', '', ),
350
351 'Fv': ( 'IDX_ParseFixedReg', 'rFLAGS', '%Fv', 'Fv', '', ),
352
353 # Fixed registers.
354 'AL': ( 'IDX_ParseFixedReg', 'AL', 'al', 'REG_AL', '', ),
355 'rAX': ( 'IDX_ParseFixedReg', 'rAX', '%eAX', 'REG_EAX', '', ),
356 'rDX': ( 'IDX_ParseFixedReg', 'rDX', '%eDX', 'REG_EDX', '', ),
357 'CS': ( 'IDX_ParseFixedReg', 'CS', 'cs', 'REG_CS', '', ), # 8086: push CS
358 'DS': ( 'IDX_ParseFixedReg', 'DS', 'ds', 'REG_DS', '', ),
359 'ES': ( 'IDX_ParseFixedReg', 'ES', 'es', 'REG_ES', '', ),
360 'FS': ( 'IDX_ParseFixedReg', 'FS', 'fs', 'REG_FS', '', ),
361 'GS': ( 'IDX_ParseFixedReg', 'GS', 'gs', 'REG_GS', '', ),
362 'SS': ( 'IDX_ParseFixedReg', 'SS', 'ss', 'REG_SS', '', ),
363};
364
365# IDX_ParseFixedReg
366# IDX_ParseVexDest
367
368
369## IEMFORM_XXX mappings.
370g_kdIemForms = { # sEncoding, [ sWhere1, ... ] opcodesub ),
371 'RM': ( 'ModR/M', [ 'reg', 'rm' ], '', ),
372 'RM_REG': ( 'ModR/M', [ 'reg', 'rm' ], '11 mr/reg', ),
373 'RM_MEM': ( 'ModR/M', [ 'reg', 'rm' ], '!11 mr/reg', ),
374 'RMI': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '', ),
375 'RMI_REG': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '11 mr/reg', ),
376 'RMI_MEM': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '!11 mr/reg', ),
377 'MR': ( 'ModR/M', [ 'rm', 'reg' ], '', ),
378 'MR_REG': ( 'ModR/M', [ 'rm', 'reg' ], '11 mr/reg', ),
379 'MR_MEM': ( 'ModR/M', [ 'rm', 'reg' ], '!11 mr/reg', ),
380 'MRI': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '', ),
381 'MRI_REG': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '11 mr/reg', ),
382 'MRI_MEM': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '!11 mr/reg', ),
383 'M': ( 'ModR/M', [ 'rm', ], '', ),
384 'M_REG': ( 'ModR/M', [ 'rm', ], '', ),
385 'M_MEM': ( 'ModR/M', [ 'rm', ], '', ),
386 'R': ( 'ModR/M', [ 'reg', ], '', ),
387
388 'VEX_RM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '', ),
389 'VEX_RM_REG': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '11 mr/reg', ),
390 'VEX_RM_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '!11 mr/reg', ),
391 'VEX_MR': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '', ),
392 'VEX_MR_REG': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '11 mr/reg', ),
393 'VEX_MR_MEM': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '!11 mr/reg', ),
394 'VEX_M': ( 'VEX.ModR/M', [ 'rm', ], '' ),
395 'VEX_M_REG': ( 'VEX.ModR/M', [ 'rm', ], '' ),
396 'VEX_M_MEM': ( 'VEX.ModR/M', [ 'rm', ], '' ),
397 'VEX_R': ( 'VEX.ModR/M', [ 'reg', ], '' ),
398 'VEX_RVM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '', ),
399 'VEX_RVM_REG': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '11 mr/reg', ),
400 'VEX_RVM_MEM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '!11 mr/reg', ),
401 'VEX_RMV': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '', ),
402 'VEX_RMV_REG': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '11 mr/reg', ),
403 'VEX_RMV_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '!11 mr/reg', ),
404 'VEX_RMI': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '', ),
405 'VEX_RMI_REG': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '11 mr/reg', ),
406 'VEX_RMI_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '!11 mr/reg', ),
407 'VEX_MVR': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '', ),
408 'VEX_MVR_REG': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '11 mr/reg', ),
409 'VEX_MVR_MEM': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '!11 mr/reg', ),
410
411 'VEX_VM': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '', ),
412 'VEX_VM_REG': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '11 mr/reg', ),
413 'VEX_VM_MEM': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '!11 mr/reg', ),
414
415 'FIXED': ( 'fixed', None, '', ),
416};
417
418## \@oppfx values.
419g_kdPrefixes = {
420 'none': [],
421 '0x66': [],
422 '0xf3': [],
423 '0xf2': [],
424};
425
426## Special \@opcode tag values.
427g_kdSpecialOpcodes = {
428 '/reg': [],
429 'mr/reg': [],
430 '11 /reg': [],
431 '!11 /reg': [],
432 '11 mr/reg': [],
433 '!11 mr/reg': [],
434};
435
436## Special \@opcodesub tag values.
437## The first value is the real value for aliases.
438## The second value is for bs3cg1.
439g_kdSubOpcodes = {
440 'none': [ None, '', ],
441 '11 mr/reg': [ '11 mr/reg', '', ],
442 '11': [ '11 mr/reg', '', ], ##< alias
443 '!11 mr/reg': [ '!11 mr/reg', '', ],
444 '!11': [ '!11 mr/reg', '', ], ##< alias
445 'rex.w=0': [ 'rex.w=0', 'WZ', ],
446 'w=0': [ 'rex.w=0', '', ], ##< alias
447 'rex.w=1': [ 'rex.w=1', 'WNZ', ],
448 'w=1': [ 'rex.w=1', '', ], ##< alias
449 'vex.l=0': [ 'vex.l=0', 'L0', ],
450 'vex.l=1': [ 'vex.l=0', 'L1', ],
451 '11 mr/reg vex.l=0': [ '11 mr/reg vex.l=0', 'L0', ],
452 '11 mr/reg vex.l=1': [ '11 mr/reg vex.l=1', 'L1', ],
453 '!11 mr/reg vex.l=0': [ '!11 mr/reg vex.l=0', 'L0', ],
454 '!11 mr/reg vex.l=1': [ '!11 mr/reg vex.l=1', 'L1', ],
455};
456
457## Valid values for \@openc
458g_kdEncodings = {
459 'ModR/M': [ 'BS3CG1ENC_MODRM', ], ##< ModR/M
460 'VEX.ModR/M': [ 'BS3CG1ENC_VEX_MODRM', ], ##< VEX...ModR/M
461 'fixed': [ 'BS3CG1ENC_FIXED', ], ##< Fixed encoding (address, registers, unused, etc).
462 'VEX.fixed': [ 'BS3CG1ENC_VEX_FIXED', ], ##< VEX + fixed encoding (address, registers, unused, etc).
463 'prefix': [ None, ], ##< Prefix
464};
465
466## \@opunused, \@opinvalid, \@opinvlstyle
467g_kdInvalidStyles = {
468 'immediate': [], ##< CPU stops decoding immediately after the opcode.
469 'vex.modrm': [], ##< VEX+ModR/M, everyone.
470 'intel-modrm': [], ##< Intel decodes ModR/M.
471 'intel-modrm-imm8': [], ##< Intel decodes ModR/M and an 8-byte immediate.
472 'intel-opcode-modrm': [], ##< Intel decodes another opcode byte followed by ModR/M. (Unused extension tables.)
473 'intel-opcode-modrm-imm8': [], ##< Intel decodes another opcode byte followed by ModR/M and an 8-byte immediate.
474};
475
476g_kdCpuNames = {
477 '8086': (),
478 '80186': (),
479 '80286': (),
480 '80386': (),
481 '80486': (),
482};
483
484## \@opcpuid
485g_kdCpuIdFlags = {
486 'vme': 'X86_CPUID_FEATURE_EDX_VME',
487 'tsc': 'X86_CPUID_FEATURE_EDX_TSC',
488 'msr': 'X86_CPUID_FEATURE_EDX_MSR',
489 'cx8': 'X86_CPUID_FEATURE_EDX_CX8',
490 'sep': 'X86_CPUID_FEATURE_EDX_SEP',
491 'cmov': 'X86_CPUID_FEATURE_EDX_CMOV',
492 'clfsh': 'X86_CPUID_FEATURE_EDX_CLFSH',
493 'clflushopt': 'X86_CPUID_STEXT_FEATURE_EBX_CLFLUSHOPT',
494 'mmx': 'X86_CPUID_FEATURE_EDX_MMX',
495 'fxsr': 'X86_CPUID_FEATURE_EDX_FXSR',
496 'sse': 'X86_CPUID_FEATURE_EDX_SSE',
497 'sse2': 'X86_CPUID_FEATURE_EDX_SSE2',
498 'sse3': 'X86_CPUID_FEATURE_ECX_SSE3',
499 'pclmul': 'X86_CPUID_FEATURE_ECX_DTES64',
500 'monitor': 'X86_CPUID_FEATURE_ECX_CPLDS',
501 'vmx': 'X86_CPUID_FEATURE_ECX_VMX',
502 'smx': 'X86_CPUID_FEATURE_ECX_TM2',
503 'ssse3': 'X86_CPUID_FEATURE_ECX_SSSE3',
504 'fma': 'X86_CPUID_FEATURE_ECX_FMA',
505 'cx16': 'X86_CPUID_FEATURE_ECX_CX16',
506 'pcid': 'X86_CPUID_FEATURE_ECX_PCID',
507 'sse4.1': 'X86_CPUID_FEATURE_ECX_SSE4_1',
508 'sse4.2': 'X86_CPUID_FEATURE_ECX_SSE4_2',
509 'movbe': 'X86_CPUID_FEATURE_ECX_MOVBE',
510 'popcnt': 'X86_CPUID_FEATURE_ECX_POPCNT',
511 'aes': 'X86_CPUID_FEATURE_ECX_AES',
512 'xsave': 'X86_CPUID_FEATURE_ECX_XSAVE',
513 'avx': 'X86_CPUID_FEATURE_ECX_AVX',
514 'avx2': 'X86_CPUID_STEXT_FEATURE_EBX_AVX2',
515 'f16c': 'X86_CPUID_FEATURE_ECX_F16C',
516 'rdrand': 'X86_CPUID_FEATURE_ECX_RDRAND',
517
518 'axmmx': 'X86_CPUID_AMD_FEATURE_EDX_AXMMX',
519 '3dnowext': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX',
520 '3dnow': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW',
521 'svm': 'X86_CPUID_AMD_FEATURE_ECX_SVM',
522 'cr8l': 'X86_CPUID_AMD_FEATURE_ECX_CR8L',
523 'abm': 'X86_CPUID_AMD_FEATURE_ECX_ABM',
524 'sse4a': 'X86_CPUID_AMD_FEATURE_ECX_SSE4A',
525 '3dnowprf': 'X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF',
526 'xop': 'X86_CPUID_AMD_FEATURE_ECX_XOP',
527 'fma4': 'X86_CPUID_AMD_FEATURE_ECX_FMA4',
528};
529
530## \@ophints values.
531# pylint: disable=line-too-long
532g_kdHints = {
533 'invalid': 'DISOPTYPE_INVALID', ##<
534 'harmless': 'DISOPTYPE_HARMLESS', ##<
535 'controlflow': 'DISOPTYPE_CONTROLFLOW', ##<
536 'potentially_dangerous': 'DISOPTYPE_POTENTIALLY_DANGEROUS', ##<
537 'dangerous': 'DISOPTYPE_DANGEROUS', ##<
538 'portio': 'DISOPTYPE_PORTIO', ##<
539 'privileged': 'DISOPTYPE_PRIVILEGED', ##<
540 'privileged_notrap': 'DISOPTYPE_PRIVILEGED_NOTRAP', ##<
541 'uncond_controlflow': 'DISOPTYPE_UNCOND_CONTROLFLOW', ##<
542 'relative_controlflow': 'DISOPTYPE_RELATIVE_CONTROLFLOW', ##<
543 'cond_controlflow': 'DISOPTYPE_COND_CONTROLFLOW', ##<
544 'interrupt': 'DISOPTYPE_INTERRUPT', ##<
545 'illegal': 'DISOPTYPE_ILLEGAL', ##<
546 'rrm_dangerous': 'DISOPTYPE_RRM_DANGEROUS', ##< Some additional dangerous ones when recompiling raw r0.
547 'rrm_dangerous_16': 'DISOPTYPE_RRM_DANGEROUS_16', ##< Some additional dangerous ones when recompiling 16-bit raw r0.
548 'inhibit_irqs': 'DISOPTYPE_INHIBIT_IRQS', ##< Will or can inhibit irqs (sti, pop ss, mov ss) */
549 'x86_portio_read': 'DISOPTYPE_X86_PORTIO_READ', ##<
550 'x86_portio_write': 'DISOPTYPE_X86_PORTIO_WRITE', ##<
551 'x86_invalid_64': 'DISOPTYPE_X86_INVALID_64', ##< Invalid in 64 bits mode
552 'x86_only_64': 'DISOPTYPE_X86_ONLY_64', ##< Only valid in 64 bits mode
553 'x86_default_64_op_size': 'DISOPTYPE_X86_DEFAULT_64_OP_SIZE', ##< Default 64 bits operand size
554 'x86_forced_64_op_size': 'DISOPTYPE_X86_FORCED_64_OP_SIZE', ##< Forced 64 bits operand size; regardless of prefix bytes
555 'x86_rexb_extends_opreg': 'DISOPTYPE_X86_REXB_EXTENDS_OPREG', ##< REX.B extends the register field in the opcode byte
556 'x86_mod_fixed_11': 'DISOPTYPE_X86_MOD_FIXED_11', ##< modrm.mod is always 11b
557 'x86_forced_32_op_size_x86': 'DISOPTYPE_X86_FORCED_32_OP_SIZE_X86', ##< Forced 32 bits operand size; regardless of prefix bytes
558 ## (only in 16 & 32 bits mode!)
559 'x86_avx': 'DISOPTYPE_X86_AVX', ##< AVX,AVX2,++ instruction. Not implemented yet!
560 'x86_sse': 'DISOPTYPE_X86_SSE', ##< SSE,SSE2,SSE3,++ instruction. Not implemented yet!
561 'x86_mmx': 'DISOPTYPE_X86_MMX', ##< MMX,MMXExt,3DNow,++ instruction. Not implemented yet!
562 'x86_fpu': 'DISOPTYPE_X86_FPU', ##< FPU instruction. Not implemented yet!
563 'ignores_oz_pfx': '', ##< Ignores operand size prefix 66h.
564 'ignores_rexw': '', ##< Ignores REX.W.
565 'ignores_op_sizes': '', ##< Shorthand for "ignores_oz_pfx | ignores_op_sizes".
566 'vex_l_zero': '', ##< VEX.L must be 0.
567 'vex_l_ignored': '', ##< VEX.L is ignored.
568 'vex_v_zero': '', ##< VEX.V must be 0. (generate sub-table?)
569 'lock_allowed': '', ##< Lock prefix allowed.
570};
571# pylint: enable=line-too-long
572
573## \@opxcpttype values (see SDMv2 2.4, 2.7).
574g_kdXcptTypes = {
575 'none': [],
576 '1': [],
577 '2': [],
578 '3': [],
579 '4': [],
580 '4UA': [],
581 '5': [],
582 '5LZ': [], # LZ = VEX.L must be zero.
583 '6': [],
584 '7': [],
585 '7LZ': [],
586 '8': [],
587 '11': [],
588 '12': [],
589 'E1': [],
590 'E1NF': [],
591 'E2': [],
592 'E3': [],
593 'E3NF': [],
594 'E4': [],
595 'E4NF': [],
596 'E5': [],
597 'E5NF': [],
598 'E6': [],
599 'E6NF': [],
600 'E7NF': [],
601 'E9': [],
602 'E9NF': [],
603 'E10': [],
604 'E11': [],
605 'E12': [],
606 'E12NF': [],
607};
608
609
610def _isValidOpcodeByte(sOpcode):
611 """
612 Checks if sOpcode is a valid lower case opcode byte.
613 Returns true/false.
614 """
615 if len(sOpcode) == 4:
616 if sOpcode[:2] == '0x':
617 if sOpcode[2] in '0123456789abcdef':
618 if sOpcode[3] in '0123456789abcdef':
619 return True;
620 return False;
621
622
623class InstructionMap(object):
624 """
625 Instruction map.
626
627 The opcode map provides the lead opcode bytes (empty for the one byte
628 opcode map). An instruction can be member of multiple opcode maps as long
629 as it uses the same opcode value within the map (because of VEX).
630 """
631
632 kdEncodings = {
633 'legacy': [],
634 'vex1': [], ##< VEX or EVEX prefix with vvvvv = 1
635 'vex2': [], ##< VEX or EVEX prefix with vvvvv = 2
636 'vex3': [], ##< VEX or EVEX prefix with vvvvv = 3
637 'xop8': [], ##< XOP prefix with vvvvv = 8
638 'xop9': [], ##< XOP prefix with vvvvv = 9
639 'xop10': [], ##< XOP prefix with vvvvv = 10
640 };
641 ## Selectors.
642 ## 1. The first value is the number of table entries required by a
643 ## decoder or disassembler for this type of selector.
644 ## 2. The second value is how many entries per opcode byte if applicable.
645 kdSelectors = {
646 'byte': [ 256, 1, ], ##< next opcode byte selects the instruction (default).
647 'byte+pfx': [ 1024, 4, ], ##< next opcode byte selects the instruction together with the 0x66, 0xf2 and 0xf3 prefixes.
648 '/r': [ 8, 1, ], ##< modrm.reg selects the instruction.
649 'memreg /r':[ 16, 1, ], ##< modrm.reg and (modrm.mod == 3) selects the instruction.
650 'mod /r': [ 32, 1, ], ##< modrm.reg and modrm.mod selects the instruction.
651 '!11 /r': [ 8, 1, ], ##< modrm.reg selects the instruction with modrm.mod != 0y11.
652 '11 /r': [ 8, 1, ], ##< modrm.reg select the instruction with modrm.mod == 0y11.
653 '11': [ 64, 1, ], ##< modrm.reg and modrm.rm select the instruction with modrm.mod == 0y11.
654 };
655
656 ## Define the subentry number according to the Instruction::sPrefix
657 ## value for 'byte+pfx' selected tables.
658 kiPrefixOrder = {
659 'none': 0,
660 '0x66': 1,
661 '0xf3': 2,
662 '0xf2': 3,
663 };
664
665 def __init__(self, sName, sIemName = None, asLeadOpcodes = None, sSelector = 'byte+pfx',
666 sEncoding = 'legacy', sDisParse = None):
667 assert sSelector in self.kdSelectors;
668 assert sEncoding in self.kdEncodings;
669 if asLeadOpcodes is None:
670 asLeadOpcodes = [];
671 else:
672 for sOpcode in asLeadOpcodes:
673 assert _isValidOpcodeByte(sOpcode);
674 assert sDisParse is None or sDisParse.startswith('IDX_Parse');
675
676 self.sName = sName;
677 self.sIemName = sIemName;
678 self.asLeadOpcodes = asLeadOpcodes; ##< Lead opcode bytes formatted as hex strings like '0x0f'.
679 self.sSelector = sSelector; ##< The member selector, see kdSelectors.
680 self.sEncoding = sEncoding; ##< The encoding, see kdSelectors.
681 self.aoInstructions = [] # type: Instruction
682 self.sDisParse = sDisParse; ##< IDX_ParseXXX.
683
684 def copy(self, sNewName, sPrefixFilter = None):
685 """
686 Copies the table with filtering instruction by sPrefix if not None.
687 """
688 oCopy = InstructionMap(sNewName, sIemName = self.sIemName, asLeadOpcodes = self.asLeadOpcodes,
689 sSelector = 'byte' if sPrefixFilter is not None and self.sSelector == 'byte+pfx'
690 else self.sSelector,
691 sEncoding = self.sEncoding, sDisParse = self.sDisParse);
692 if sPrefixFilter is None:
693 oCopy.aoInstructions = list(self.aoInstructions);
694 else:
695 oCopy.aoInstructions = [oInstr for oInstr in self.aoInstructions if oInstr.sPrefix == sPrefixFilter];
696 return oCopy;
697
698 def getTableSize(self):
699 """
700 Number of table entries. This corresponds directly to the selector.
701 """
702 return self.kdSelectors[self.sSelector][0];
703
704 def getEntriesPerByte(self):
705 """
706 Number of table entries per opcode bytes.
707
708 This only really makes sense for the 'byte' and 'byte+pfx' selectors, for
709 the others it will just return 1.
710 """
711 return self.kdSelectors[self.sSelector][1];
712
713 def getInstructionIndex(self, oInstr):
714 """
715 Returns the table index for the instruction.
716 """
717 bOpcode = oInstr.getOpcodeByte();
718
719 # The byte selectors are simple. We need a full opcode byte and need just return it.
720 if self.sSelector == 'byte':
721 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
722 return bOpcode;
723
724 # The byte + prefix selector is similarly simple, though requires a prefix as well as the full opcode.
725 if self.sSelector == 'byte+pfx':
726 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
727 assert self.kiPrefixOrder.get(oInstr.sPrefix, -16384) >= 0;
728 return bOpcode * 4 + self.kiPrefixOrder.get(oInstr.sPrefix, -16384);
729
730 # The other selectors needs masking and shifting.
731 if self.sSelector == '/r':
732 return (bOpcode >> 3) & 0x7;
733
734 if self.sSelector == 'mod /r':
735 return (bOpcode >> 3) & 0x1f;
736
737 if self.sSelector == 'memreg /r':
738 return ((bOpcode >> 3) & 0x7) | (int((bOpcode >> 6) == 3) << 3);
739
740 if self.sSelector == '!11 /r':
741 assert (bOpcode & 0xc0) != 0xc, str(oInstr);
742 return (bOpcode >> 3) & 0x7;
743
744 if self.sSelector == '11 /r':
745 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
746 return (bOpcode >> 3) & 0x7;
747
748 if self.sSelector == '11':
749 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
750 return bOpcode & 0x3f;
751
752 assert False, self.sSelector;
753 return -1;
754
755 def getInstructionsInTableOrder(self):
756 """
757 Get instructions in table order.
758
759 Returns array of instructions. Normally there is exactly one
760 instruction per entry. However the entry could also be None if
761 not instruction was specified for that opcode value. Or there
762 could be a list of instructions to deal with special encodings
763 where for instance prefix (e.g. REX.W) encodes a different
764 instruction or different CPUs have different instructions or
765 prefixes in the same place.
766 """
767 # Start with empty table.
768 cTable = self.getTableSize();
769 aoTable = [None] * cTable;
770
771 # Insert the instructions.
772 for oInstr in self.aoInstructions:
773 if oInstr.sOpcode:
774 idxOpcode = self.getInstructionIndex(oInstr);
775 assert idxOpcode < cTable, str(idxOpcode);
776
777 oExisting = aoTable[idxOpcode];
778 if oExisting is None:
779 aoTable[idxOpcode] = oInstr;
780 elif not isinstance(oExisting, list):
781 aoTable[idxOpcode] = list([oExisting, oInstr]);
782 else:
783 oExisting.append(oInstr);
784
785 return aoTable;
786
787
788 def getDisasTableName(self):
789 """
790 Returns the disassembler table name for this map.
791 """
792 sName = 'g_aDisas';
793 for sWord in self.sName.split('_'):
794 if sWord == 'm': # suffix indicating modrm.mod==mem
795 sName += '_m';
796 elif sWord == 'r': # suffix indicating modrm.mod==reg
797 sName += '_r';
798 elif len(sWord) == 2 and re.match('^[a-f0-9][a-f0-9]$', sWord):
799 sName += '_' + sWord;
800 else:
801 sWord = sWord.replace('grp', 'Grp');
802 sWord = sWord.replace('map', 'Map');
803 sName += sWord[0].upper() + sWord[1:];
804 return sName;
805
806 def getDisasRangeName(self):
807 """
808 Returns the disassembler table range name for this map.
809 """
810 return self.getDisasTableName().replace('g_aDisas', 'g_Disas') + 'Range';
811
812 def isVexMap(self):
813 """ Returns True if a VEX map. """
814 return self.sEncoding.startswith('vex');
815
816
817class TestType(object):
818 """
819 Test value type.
820
821 This base class deals with integer like values. The fUnsigned constructor
822 parameter indicates the default stance on zero vs sign extending. It is
823 possible to override fUnsigned=True by prefixing the value with '+' or '-'.
824 """
825 def __init__(self, sName, acbSizes = None, fUnsigned = True):
826 self.sName = sName;
827 self.acbSizes = [1, 2, 4, 8, 16, 32] if acbSizes is None else acbSizes; # Normal sizes.
828 self.fUnsigned = fUnsigned;
829
830 class BadValue(Exception):
831 """ Bad value exception. """
832 def __init__(self, sMessage):
833 Exception.__init__(self, sMessage);
834 self.sMessage = sMessage;
835
836 ## For ascii ~ operator.
837 kdHexInv = {
838 '0': 'f',
839 '1': 'e',
840 '2': 'd',
841 '3': 'c',
842 '4': 'b',
843 '5': 'a',
844 '6': '9',
845 '7': '8',
846 '8': '7',
847 '9': '6',
848 'a': '5',
849 'b': '4',
850 'c': '3',
851 'd': '2',
852 'e': '1',
853 'f': '0',
854 };
855
856 def get(self, sValue):
857 """
858 Get the shortest normal sized byte representation of oValue.
859
860 Returns ((fSignExtend, bytearray), ) or ((fSignExtend, bytearray), (fSignExtend, bytearray), ).
861 The latter form is for AND+OR pairs where the first entry is what to
862 AND with the field and the second the one or OR with.
863
864 Raises BadValue if invalid value.
865 """
866 if not sValue:
867 raise TestType.BadValue('empty value');
868
869 # Deal with sign and detect hexadecimal or decimal.
870 fSignExtend = not self.fUnsigned;
871 if sValue[0] == '-' or sValue[0] == '+':
872 fSignExtend = True;
873 fHex = len(sValue) > 3 and sValue[1:3].lower() == '0x';
874 else:
875 fHex = len(sValue) > 2 and sValue[0:2].lower() == '0x';
876
877 # try convert it to long integer.
878 try:
879 iValue = long(sValue, 16 if fHex else 10);
880 except Exception as oXcpt:
881 raise TestType.BadValue('failed to convert "%s" to integer (%s)' % (sValue, oXcpt));
882
883 # Convert the hex string and pad it to a decent value. Negative values
884 # needs to be manually converted to something non-negative (~-n + 1).
885 if iValue >= 0:
886 sHex = hex(iValue);
887 if sys.version_info[0] < 3:
888 assert sHex[-1] == 'L';
889 sHex = sHex[:-1];
890 assert sHex[:2] == '0x';
891 sHex = sHex[2:];
892 else:
893 sHex = hex(-iValue - 1);
894 if sys.version_info[0] < 3:
895 assert sHex[-1] == 'L';
896 sHex = sHex[:-1];
897 assert sHex[:2] == '0x';
898 sHex = ''.join([self.kdHexInv[sDigit] for sDigit in sHex[2:]]);
899 if fSignExtend and sHex[0] not in [ '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']:
900 sHex = 'f' + sHex;
901
902 cDigits = len(sHex);
903 if cDigits <= self.acbSizes[-1] * 2:
904 for cb in self.acbSizes:
905 cNaturalDigits = cb * 2;
906 if cDigits <= cNaturalDigits:
907 break;
908 else:
909 cNaturalDigits = self.acbSizes[-1] * 2;
910 cNaturalDigits = int((cDigits + cNaturalDigits - 1) / cNaturalDigits) * cNaturalDigits;
911 assert isinstance(cNaturalDigits, int)
912
913 if cNaturalDigits != cDigits:
914 cNeeded = cNaturalDigits - cDigits;
915 if iValue >= 0:
916 sHex = ('0' * cNeeded) + sHex;
917 else:
918 sHex = ('f' * cNeeded) + sHex;
919
920 # Invert and convert to bytearray and return it.
921 abValue = bytearray([int(sHex[offHex - 2 : offHex], 16) for offHex in range(len(sHex), 0, -2)]);
922
923 return ((fSignExtend, abValue),);
924
925 def validate(self, sValue):
926 """
927 Returns True if value is okay, error message on failure.
928 """
929 try:
930 self.get(sValue);
931 except TestType.BadValue as oXcpt:
932 return oXcpt.sMessage;
933 return True;
934
935 def isAndOrPair(self, sValue):
936 """
937 Checks if sValue is a pair.
938 """
939 _ = sValue;
940 return False;
941
942
943class TestTypeEflags(TestType):
944 """
945 Special value parsing for EFLAGS/RFLAGS/FLAGS.
946 """
947
948 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
949
950 def __init__(self, sName):
951 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
952
953 def get(self, sValue):
954 fClear = 0;
955 fSet = 0;
956 for sFlag in sValue.split(','):
957 sConstant = g_kdEFlagsMnemonics.get(sFlag, None);
958 if sConstant is None:
959 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
960 if sConstant[0] == '!':
961 fClear |= g_kdX86EFlagsConstants[sConstant[1:]];
962 else:
963 fSet |= g_kdX86EFlagsConstants[sConstant];
964
965 aoSet = TestType.get(self, '0x%x' % (fSet,));
966 if fClear != 0:
967 aoClear = TestType.get(self, '%#x' % (fClear,))
968 assert self.isAndOrPair(sValue) is True;
969 return (aoClear[0], aoSet[0]);
970 assert self.isAndOrPair(sValue) is False;
971 return aoSet;
972
973 def isAndOrPair(self, sValue):
974 for sZeroFlag in self.kdZeroValueFlags:
975 if sValue.find(sZeroFlag) >= 0:
976 return True;
977 return False;
978
979class TestTypeFromDict(TestType):
980 """
981 Special value parsing for CR0.
982 """
983
984 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
985
986 def __init__(self, sName, kdConstantsAndValues, sConstantPrefix):
987 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
988 self.kdConstantsAndValues = kdConstantsAndValues;
989 self.sConstantPrefix = sConstantPrefix;
990
991 def get(self, sValue):
992 fValue = 0;
993 for sFlag in sValue.split(','):
994 fFlagValue = self.kdConstantsAndValues.get(self.sConstantPrefix + sFlag.upper(), None);
995 if fFlagValue is None:
996 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
997 fValue |= fFlagValue;
998 return TestType.get(self, '0x%x' % (fValue,));
999
1000
1001class TestInOut(object):
1002 """
1003 One input or output state modifier.
1004
1005 This should be thought as values to modify BS3REGCTX and extended (needs
1006 to be structured) state.
1007 """
1008 ## Assigned operators.
1009 kasOperators = [
1010 '&|=', # Special AND(INV)+OR operator for use with EFLAGS.
1011 '&~=',
1012 '&=',
1013 '|=',
1014 '='
1015 ];
1016 ## Types
1017 kdTypes = {
1018 'uint': TestType('uint', fUnsigned = True),
1019 'int': TestType('int'),
1020 'efl': TestTypeEflags('efl'),
1021 'cr0': TestTypeFromDict('cr0', g_kdX86Cr0Constants, 'X86_CR0_'),
1022 'cr4': TestTypeFromDict('cr4', g_kdX86Cr4Constants, 'X86_CR4_'),
1023 'xcr0': TestTypeFromDict('xcr0', g_kdX86XSaveCConstants, 'XSAVE_C_'),
1024 };
1025 ## CPU context fields.
1026 kdFields = {
1027 # name: ( default type, [both|input|output], )
1028 # Operands.
1029 'op1': ( 'uint', 'both', ), ## \@op1
1030 'op2': ( 'uint', 'both', ), ## \@op2
1031 'op3': ( 'uint', 'both', ), ## \@op3
1032 'op4': ( 'uint', 'both', ), ## \@op4
1033 # Flags.
1034 'efl': ( 'efl', 'both', ),
1035 'efl_undef': ( 'uint', 'output', ),
1036 # 8-bit GPRs.
1037 'al': ( 'uint', 'both', ),
1038 'cl': ( 'uint', 'both', ),
1039 'dl': ( 'uint', 'both', ),
1040 'bl': ( 'uint', 'both', ),
1041 'ah': ( 'uint', 'both', ),
1042 'ch': ( 'uint', 'both', ),
1043 'dh': ( 'uint', 'both', ),
1044 'bh': ( 'uint', 'both', ),
1045 'r8l': ( 'uint', 'both', ),
1046 'r9l': ( 'uint', 'both', ),
1047 'r10l': ( 'uint', 'both', ),
1048 'r11l': ( 'uint', 'both', ),
1049 'r12l': ( 'uint', 'both', ),
1050 'r13l': ( 'uint', 'both', ),
1051 'r14l': ( 'uint', 'both', ),
1052 'r15l': ( 'uint', 'both', ),
1053 # 16-bit GPRs.
1054 'ax': ( 'uint', 'both', ),
1055 'dx': ( 'uint', 'both', ),
1056 'cx': ( 'uint', 'both', ),
1057 'bx': ( 'uint', 'both', ),
1058 'sp': ( 'uint', 'both', ),
1059 'bp': ( 'uint', 'both', ),
1060 'si': ( 'uint', 'both', ),
1061 'di': ( 'uint', 'both', ),
1062 'r8w': ( 'uint', 'both', ),
1063 'r9w': ( 'uint', 'both', ),
1064 'r10w': ( 'uint', 'both', ),
1065 'r11w': ( 'uint', 'both', ),
1066 'r12w': ( 'uint', 'both', ),
1067 'r13w': ( 'uint', 'both', ),
1068 'r14w': ( 'uint', 'both', ),
1069 'r15w': ( 'uint', 'both', ),
1070 # 32-bit GPRs.
1071 'eax': ( 'uint', 'both', ),
1072 'edx': ( 'uint', 'both', ),
1073 'ecx': ( 'uint', 'both', ),
1074 'ebx': ( 'uint', 'both', ),
1075 'esp': ( 'uint', 'both', ),
1076 'ebp': ( 'uint', 'both', ),
1077 'esi': ( 'uint', 'both', ),
1078 'edi': ( 'uint', 'both', ),
1079 'r8d': ( 'uint', 'both', ),
1080 'r9d': ( 'uint', 'both', ),
1081 'r10d': ( 'uint', 'both', ),
1082 'r11d': ( 'uint', 'both', ),
1083 'r12d': ( 'uint', 'both', ),
1084 'r13d': ( 'uint', 'both', ),
1085 'r14d': ( 'uint', 'both', ),
1086 'r15d': ( 'uint', 'both', ),
1087 # 64-bit GPRs.
1088 'rax': ( 'uint', 'both', ),
1089 'rdx': ( 'uint', 'both', ),
1090 'rcx': ( 'uint', 'both', ),
1091 'rbx': ( 'uint', 'both', ),
1092 'rsp': ( 'uint', 'both', ),
1093 'rbp': ( 'uint', 'both', ),
1094 'rsi': ( 'uint', 'both', ),
1095 'rdi': ( 'uint', 'both', ),
1096 'r8': ( 'uint', 'both', ),
1097 'r9': ( 'uint', 'both', ),
1098 'r10': ( 'uint', 'both', ),
1099 'r11': ( 'uint', 'both', ),
1100 'r12': ( 'uint', 'both', ),
1101 'r13': ( 'uint', 'both', ),
1102 'r14': ( 'uint', 'both', ),
1103 'r15': ( 'uint', 'both', ),
1104 # 16-bit, 32-bit or 64-bit registers according to operand size.
1105 'oz.rax': ( 'uint', 'both', ),
1106 'oz.rdx': ( 'uint', 'both', ),
1107 'oz.rcx': ( 'uint', 'both', ),
1108 'oz.rbx': ( 'uint', 'both', ),
1109 'oz.rsp': ( 'uint', 'both', ),
1110 'oz.rbp': ( 'uint', 'both', ),
1111 'oz.rsi': ( 'uint', 'both', ),
1112 'oz.rdi': ( 'uint', 'both', ),
1113 'oz.r8': ( 'uint', 'both', ),
1114 'oz.r9': ( 'uint', 'both', ),
1115 'oz.r10': ( 'uint', 'both', ),
1116 'oz.r11': ( 'uint', 'both', ),
1117 'oz.r12': ( 'uint', 'both', ),
1118 'oz.r13': ( 'uint', 'both', ),
1119 'oz.r14': ( 'uint', 'both', ),
1120 'oz.r15': ( 'uint', 'both', ),
1121 # Control registers.
1122 'cr0': ( 'cr0', 'both', ),
1123 'cr4': ( 'cr4', 'both', ),
1124 'xcr0': ( 'xcr0', 'both', ),
1125 # FPU Registers
1126 'fcw': ( 'uint', 'both', ),
1127 'fsw': ( 'uint', 'both', ),
1128 'ftw': ( 'uint', 'both', ),
1129 'fop': ( 'uint', 'both', ),
1130 'fpuip': ( 'uint', 'both', ),
1131 'fpucs': ( 'uint', 'both', ),
1132 'fpudp': ( 'uint', 'both', ),
1133 'fpuds': ( 'uint', 'both', ),
1134 'mxcsr': ( 'uint', 'both', ),
1135 'st0': ( 'uint', 'both', ),
1136 'st1': ( 'uint', 'both', ),
1137 'st2': ( 'uint', 'both', ),
1138 'st3': ( 'uint', 'both', ),
1139 'st4': ( 'uint', 'both', ),
1140 'st5': ( 'uint', 'both', ),
1141 'st6': ( 'uint', 'both', ),
1142 'st7': ( 'uint', 'both', ),
1143 # MMX registers.
1144 'mm0': ( 'uint', 'both', ),
1145 'mm1': ( 'uint', 'both', ),
1146 'mm2': ( 'uint', 'both', ),
1147 'mm3': ( 'uint', 'both', ),
1148 'mm4': ( 'uint', 'both', ),
1149 'mm5': ( 'uint', 'both', ),
1150 'mm6': ( 'uint', 'both', ),
1151 'mm7': ( 'uint', 'both', ),
1152 # SSE registers.
1153 'xmm0': ( 'uint', 'both', ),
1154 'xmm1': ( 'uint', 'both', ),
1155 'xmm2': ( 'uint', 'both', ),
1156 'xmm3': ( 'uint', 'both', ),
1157 'xmm4': ( 'uint', 'both', ),
1158 'xmm5': ( 'uint', 'both', ),
1159 'xmm6': ( 'uint', 'both', ),
1160 'xmm7': ( 'uint', 'both', ),
1161 'xmm8': ( 'uint', 'both', ),
1162 'xmm9': ( 'uint', 'both', ),
1163 'xmm10': ( 'uint', 'both', ),
1164 'xmm11': ( 'uint', 'both', ),
1165 'xmm12': ( 'uint', 'both', ),
1166 'xmm13': ( 'uint', 'both', ),
1167 'xmm14': ( 'uint', 'both', ),
1168 'xmm15': ( 'uint', 'both', ),
1169 'xmm0.lo': ( 'uint', 'both', ),
1170 'xmm1.lo': ( 'uint', 'both', ),
1171 'xmm2.lo': ( 'uint', 'both', ),
1172 'xmm3.lo': ( 'uint', 'both', ),
1173 'xmm4.lo': ( 'uint', 'both', ),
1174 'xmm5.lo': ( 'uint', 'both', ),
1175 'xmm6.lo': ( 'uint', 'both', ),
1176 'xmm7.lo': ( 'uint', 'both', ),
1177 'xmm8.lo': ( 'uint', 'both', ),
1178 'xmm9.lo': ( 'uint', 'both', ),
1179 'xmm10.lo': ( 'uint', 'both', ),
1180 'xmm11.lo': ( 'uint', 'both', ),
1181 'xmm12.lo': ( 'uint', 'both', ),
1182 'xmm13.lo': ( 'uint', 'both', ),
1183 'xmm14.lo': ( 'uint', 'both', ),
1184 'xmm15.lo': ( 'uint', 'both', ),
1185 'xmm0.hi': ( 'uint', 'both', ),
1186 'xmm1.hi': ( 'uint', 'both', ),
1187 'xmm2.hi': ( 'uint', 'both', ),
1188 'xmm3.hi': ( 'uint', 'both', ),
1189 'xmm4.hi': ( 'uint', 'both', ),
1190 'xmm5.hi': ( 'uint', 'both', ),
1191 'xmm6.hi': ( 'uint', 'both', ),
1192 'xmm7.hi': ( 'uint', 'both', ),
1193 'xmm8.hi': ( 'uint', 'both', ),
1194 'xmm9.hi': ( 'uint', 'both', ),
1195 'xmm10.hi': ( 'uint', 'both', ),
1196 'xmm11.hi': ( 'uint', 'both', ),
1197 'xmm12.hi': ( 'uint', 'both', ),
1198 'xmm13.hi': ( 'uint', 'both', ),
1199 'xmm14.hi': ( 'uint', 'both', ),
1200 'xmm15.hi': ( 'uint', 'both', ),
1201 'xmm0.lo.zx': ( 'uint', 'both', ),
1202 'xmm1.lo.zx': ( 'uint', 'both', ),
1203 'xmm2.lo.zx': ( 'uint', 'both', ),
1204 'xmm3.lo.zx': ( 'uint', 'both', ),
1205 'xmm4.lo.zx': ( 'uint', 'both', ),
1206 'xmm5.lo.zx': ( 'uint', 'both', ),
1207 'xmm6.lo.zx': ( 'uint', 'both', ),
1208 'xmm7.lo.zx': ( 'uint', 'both', ),
1209 'xmm8.lo.zx': ( 'uint', 'both', ),
1210 'xmm9.lo.zx': ( 'uint', 'both', ),
1211 'xmm10.lo.zx': ( 'uint', 'both', ),
1212 'xmm11.lo.zx': ( 'uint', 'both', ),
1213 'xmm12.lo.zx': ( 'uint', 'both', ),
1214 'xmm13.lo.zx': ( 'uint', 'both', ),
1215 'xmm14.lo.zx': ( 'uint', 'both', ),
1216 'xmm15.lo.zx': ( 'uint', 'both', ),
1217 'xmm0.dw0': ( 'uint', 'both', ),
1218 'xmm1.dw0': ( 'uint', 'both', ),
1219 'xmm2.dw0': ( 'uint', 'both', ),
1220 'xmm3.dw0': ( 'uint', 'both', ),
1221 'xmm4.dw0': ( 'uint', 'both', ),
1222 'xmm5.dw0': ( 'uint', 'both', ),
1223 'xmm6.dw0': ( 'uint', 'both', ),
1224 'xmm7.dw0': ( 'uint', 'both', ),
1225 'xmm8.dw0': ( 'uint', 'both', ),
1226 'xmm9.dw0': ( 'uint', 'both', ),
1227 'xmm10.dw0': ( 'uint', 'both', ),
1228 'xmm11.dw0': ( 'uint', 'both', ),
1229 'xmm12.dw0': ( 'uint', 'both', ),
1230 'xmm13.dw0': ( 'uint', 'both', ),
1231 'xmm14.dw0': ( 'uint', 'both', ),
1232 'xmm15_dw0': ( 'uint', 'both', ),
1233 # AVX registers.
1234 'ymm0': ( 'uint', 'both', ),
1235 'ymm1': ( 'uint', 'both', ),
1236 'ymm2': ( 'uint', 'both', ),
1237 'ymm3': ( 'uint', 'both', ),
1238 'ymm4': ( 'uint', 'both', ),
1239 'ymm5': ( 'uint', 'both', ),
1240 'ymm6': ( 'uint', 'both', ),
1241 'ymm7': ( 'uint', 'both', ),
1242 'ymm8': ( 'uint', 'both', ),
1243 'ymm9': ( 'uint', 'both', ),
1244 'ymm10': ( 'uint', 'both', ),
1245 'ymm11': ( 'uint', 'both', ),
1246 'ymm12': ( 'uint', 'both', ),
1247 'ymm13': ( 'uint', 'both', ),
1248 'ymm14': ( 'uint', 'both', ),
1249 'ymm15': ( 'uint', 'both', ),
1250
1251 # Special ones.
1252 'value.xcpt': ( 'uint', 'output', ),
1253 };
1254
1255 def __init__(self, sField, sOp, sValue, sType):
1256 assert sField in self.kdFields;
1257 assert sOp in self.kasOperators;
1258 self.sField = sField;
1259 self.sOp = sOp;
1260 self.sValue = sValue;
1261 self.sType = sType;
1262 assert isinstance(sField, str);
1263 assert isinstance(sOp, str);
1264 assert isinstance(sType, str);
1265 assert isinstance(sValue, str);
1266
1267
1268class TestSelector(object):
1269 """
1270 One selector for an instruction test.
1271 """
1272 ## Selector compare operators.
1273 kasCompareOps = [ '==', '!=' ];
1274 ## Selector variables and their valid values.
1275 kdVariables = {
1276 # Operand size.
1277 'size': {
1278 'o16': 'size_o16',
1279 'o32': 'size_o32',
1280 'o64': 'size_o64',
1281 },
1282 # VEX.L value.
1283 'vex.l': {
1284 '0': 'vexl_0',
1285 '1': 'vexl_1',
1286 },
1287 # Execution ring.
1288 'ring': {
1289 '0': 'ring_0',
1290 '1': 'ring_1',
1291 '2': 'ring_2',
1292 '3': 'ring_3',
1293 '0..2': 'ring_0_thru_2',
1294 '1..3': 'ring_1_thru_3',
1295 },
1296 # Basic code mode.
1297 'codebits': {
1298 '64': 'code_64bit',
1299 '32': 'code_32bit',
1300 '16': 'code_16bit',
1301 },
1302 # cpu modes.
1303 'mode': {
1304 'real': 'mode_real',
1305 'prot': 'mode_prot',
1306 'long': 'mode_long',
1307 'v86': 'mode_v86',
1308 'smm': 'mode_smm',
1309 'vmx': 'mode_vmx',
1310 'svm': 'mode_svm',
1311 },
1312 # paging on/off
1313 'paging': {
1314 'on': 'paging_on',
1315 'off': 'paging_off',
1316 },
1317 # CPU vendor
1318 'vendor': {
1319 'amd': 'vendor_amd',
1320 'intel': 'vendor_intel',
1321 'via': 'vendor_via',
1322 },
1323 };
1324 ## Selector shorthand predicates.
1325 ## These translates into variable expressions.
1326 kdPredicates = {
1327 'o16': 'size==o16',
1328 'o32': 'size==o32',
1329 'o64': 'size==o64',
1330 'ring0': 'ring==0',
1331 '!ring0': 'ring==1..3',
1332 'ring1': 'ring==1',
1333 'ring2': 'ring==2',
1334 'ring3': 'ring==3',
1335 'user': 'ring==3',
1336 'supervisor': 'ring==0..2',
1337 '16-bit': 'codebits==16',
1338 '32-bit': 'codebits==32',
1339 '64-bit': 'codebits==64',
1340 'real': 'mode==real',
1341 'prot': 'mode==prot',
1342 'long': 'mode==long',
1343 'v86': 'mode==v86',
1344 'smm': 'mode==smm',
1345 'vmx': 'mode==vmx',
1346 'svm': 'mode==svm',
1347 'paging': 'paging==on',
1348 '!paging': 'paging==off',
1349 'amd': 'vendor==amd',
1350 '!amd': 'vendor!=amd',
1351 'intel': 'vendor==intel',
1352 '!intel': 'vendor!=intel',
1353 'via': 'vendor==via',
1354 '!via': 'vendor!=via',
1355 };
1356
1357 def __init__(self, sVariable, sOp, sValue):
1358 assert sVariable in self.kdVariables;
1359 assert sOp in self.kasCompareOps;
1360 assert sValue in self.kdVariables[sVariable];
1361 self.sVariable = sVariable;
1362 self.sOp = sOp;
1363 self.sValue = sValue;
1364
1365
1366class InstructionTest(object):
1367 """
1368 Instruction test.
1369 """
1370
1371 def __init__(self, oInstr): # type: (InstructionTest, Instruction)
1372 self.oInstr = oInstr # type: InstructionTest
1373 self.aoInputs = [] # type: list(TestInOut)
1374 self.aoOutputs = [] # type: list(TestInOut)
1375 self.aoSelectors = [] # type: list(TestSelector)
1376
1377 def toString(self, fRepr = False):
1378 """
1379 Converts it to string representation.
1380 """
1381 asWords = [];
1382 if self.aoSelectors:
1383 for oSelector in self.aoSelectors:
1384 asWords.append('%s%s%s' % (oSelector.sVariable, oSelector.sOp, oSelector.sValue,));
1385 asWords.append('/');
1386
1387 for oModifier in self.aoInputs:
1388 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1389
1390 asWords.append('->');
1391
1392 for oModifier in self.aoOutputs:
1393 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1394
1395 if fRepr:
1396 return '<' + ' '.join(asWords) + '>';
1397 return ' '.join(asWords);
1398
1399 def __str__(self):
1400 """ Provide string represenation. """
1401 return self.toString(False);
1402
1403 def __repr__(self):
1404 """ Provide unambigious string representation. """
1405 return self.toString(True);
1406
1407class Operand(object):
1408 """
1409 Instruction operand.
1410 """
1411
1412 def __init__(self, sWhere, sType):
1413 assert sWhere in g_kdOpLocations, sWhere;
1414 assert sType in g_kdOpTypes, sType;
1415 self.sWhere = sWhere; ##< g_kdOpLocations
1416 self.sType = sType; ##< g_kdOpTypes
1417
1418 def usesModRM(self):
1419 """ Returns True if using some form of ModR/M encoding. """
1420 return self.sType[0] in ['E', 'G', 'M'];
1421
1422
1423
1424class Instruction(object): # pylint: disable=too-many-instance-attributes
1425 """
1426 Instruction.
1427 """
1428
1429 def __init__(self, sSrcFile, iLine):
1430 ## @name Core attributes.
1431 ## @{
1432 self.oParent = None # type: Instruction
1433 self.sMnemonic = None;
1434 self.sBrief = None;
1435 self.asDescSections = [] # type: list(str)
1436 self.aoMaps = [] # type: list(InstructionMap)
1437 self.aoOperands = [] # type: list(Operand)
1438 self.sPrefix = None; ##< Single prefix: None, 'none', 0x66, 0xf3, 0xf2
1439 self.sOpcode = None # type: str
1440 self.sSubOpcode = None # type: str
1441 self.sEncoding = None;
1442 self.asFlTest = None;
1443 self.asFlModify = None;
1444 self.asFlUndefined = None;
1445 self.asFlSet = None;
1446 self.asFlClear = None;
1447 self.dHints = {}; ##< Dictionary of instruction hints, flags, whatnot. (Dictionary for speed; dummy value).
1448 self.sDisEnum = None; ##< OP_XXXX value. Default is based on the uppercased mnemonic.
1449 self.asCpuIds = []; ##< The CPUID feature bit names for this instruction. If multiple, assume AND.
1450 self.asReqFeatures = []; ##< Which features are required to be enabled to run this instruction.
1451 self.aoTests = [] # type: list(InstructionTest)
1452 self.sMinCpu = None; ##< Indicates the minimum CPU required for the instruction. Not set when oCpuExpr is.
1453 self.oCpuExpr = None; ##< Some CPU restriction expression...
1454 self.sGroup = None;
1455 self.fUnused = False; ##< Unused instruction.
1456 self.fInvalid = False; ##< Invalid instruction (like UD2).
1457 self.sInvalidStyle = None; ##< Invalid behviour style (g_kdInvalidStyles),
1458 self.sXcptType = None; ##< Exception type (g_kdXcptTypes).
1459 ## @}
1460
1461 ## @name Implementation attributes.
1462 ## @{
1463 self.sStats = None;
1464 self.sFunction = None;
1465 self.fStub = False;
1466 self.fUdStub = False;
1467 ## @}
1468
1469 ## @name Decoding info
1470 ## @{
1471 self.sSrcFile = sSrcFile;
1472 self.iLineCreated = iLine;
1473 self.iLineCompleted = None;
1474 self.cOpTags = 0;
1475 self.iLineFnIemOpMacro = -1;
1476 self.iLineMnemonicMacro = -1;
1477 ## @}
1478
1479 ## @name Intermediate input fields.
1480 ## @{
1481 self.sRawDisOpNo = None;
1482 self.asRawDisParams = [];
1483 self.sRawIemOpFlags = None;
1484 self.sRawOldOpcodes = None;
1485 self.asCopyTests = [];
1486 ## @}
1487
1488 def toString(self, fRepr = False):
1489 """ Turn object into a string. """
1490 aasFields = [];
1491
1492 aasFields.append(['opcode', self.sOpcode]);
1493 if self.sPrefix:
1494 aasFields.append(['prefix', self.sPrefix]);
1495 aasFields.append(['mnemonic', self.sMnemonic]);
1496 for iOperand, oOperand in enumerate(self.aoOperands):
1497 aasFields.append(['op%u' % (iOperand + 1,), '%s:%s' % (oOperand.sWhere, oOperand.sType,)]);
1498 if self.aoMaps: aasFields.append(['maps', ','.join([oMap.sName for oMap in self.aoMaps])]);
1499 aasFields.append(['encoding', self.sEncoding]);
1500 if self.dHints: aasFields.append(['hints', ','.join(self.dHints.keys())]);
1501 aasFields.append(['disenum', self.sDisEnum]);
1502 if self.asCpuIds: aasFields.append(['cpuid', ','.join(self.asCpuIds)]);
1503 aasFields.append(['group', self.sGroup]);
1504 if self.fUnused: aasFields.append(['unused', 'True']);
1505 if self.fInvalid: aasFields.append(['invalid', 'True']);
1506 aasFields.append(['invlstyle', self.sInvalidStyle]);
1507 aasFields.append(['fltest', self.asFlTest]);
1508 aasFields.append(['flmodify', self.asFlModify]);
1509 aasFields.append(['flundef', self.asFlUndefined]);
1510 aasFields.append(['flset', self.asFlSet]);
1511 aasFields.append(['flclear', self.asFlClear]);
1512 aasFields.append(['mincpu', self.sMinCpu]);
1513 aasFields.append(['stats', self.sStats]);
1514 aasFields.append(['sFunction', self.sFunction]);
1515 if self.fStub: aasFields.append(['fStub', 'True']);
1516 if self.fUdStub: aasFields.append(['fUdStub', 'True']);
1517 if self.cOpTags: aasFields.append(['optags', str(self.cOpTags)]);
1518 if self.iLineFnIemOpMacro != -1: aasFields.append(['FNIEMOP_XXX', str(self.iLineFnIemOpMacro)]);
1519 if self.iLineMnemonicMacro != -1: aasFields.append(['IEMOP_MNEMMONICn', str(self.iLineMnemonicMacro)]);
1520
1521 sRet = '<' if fRepr else '';
1522 for sField, sValue in aasFields:
1523 if sValue is not None:
1524 if len(sRet) > 1:
1525 sRet += '; ';
1526 sRet += '%s=%s' % (sField, sValue,);
1527 if fRepr:
1528 sRet += '>';
1529
1530 return sRet;
1531
1532 def __str__(self):
1533 """ Provide string represenation. """
1534 return self.toString(False);
1535
1536 def __repr__(self):
1537 """ Provide unambigious string representation. """
1538 return self.toString(True);
1539
1540 def copy(self, oMap = None, sOpcode = None, sSubOpcode = None, sPrefix = None):
1541 """
1542 Makes a copy of the object for the purpose of putting in a different map
1543 or a different place in the current map.
1544 """
1545 oCopy = Instruction(self.sSrcFile, self.iLineCreated);
1546
1547 oCopy.oParent = self;
1548 oCopy.sMnemonic = self.sMnemonic;
1549 oCopy.sBrief = self.sBrief;
1550 oCopy.asDescSections = list(self.asDescSections);
1551 oCopy.aoMaps = [oMap,] if oMap else list(self.aoMaps);
1552 oCopy.aoOperands = list(self.aoOperands); ## Deeper copy?
1553 oCopy.sPrefix = sPrefix if sPrefix else self.sPrefix;
1554 oCopy.sOpcode = sOpcode if sOpcode else self.sOpcode;
1555 oCopy.sSubOpcode = sSubOpcode if sSubOpcode else self.sSubOpcode;
1556 oCopy.sEncoding = self.sEncoding;
1557 oCopy.asFlTest = self.asFlTest;
1558 oCopy.asFlModify = self.asFlModify;
1559 oCopy.asFlUndefined = self.asFlUndefined;
1560 oCopy.asFlSet = self.asFlSet;
1561 oCopy.asFlClear = self.asFlClear;
1562 oCopy.dHints = dict(self.dHints);
1563 oCopy.sDisEnum = self.sDisEnum;
1564 oCopy.asCpuIds = list(self.asCpuIds);
1565 oCopy.asReqFeatures = list(self.asReqFeatures);
1566 oCopy.aoTests = list(self.aoTests); ## Deeper copy?
1567 oCopy.sMinCpu = self.sMinCpu;
1568 oCopy.oCpuExpr = self.oCpuExpr;
1569 oCopy.sGroup = self.sGroup;
1570 oCopy.fUnused = self.fUnused;
1571 oCopy.fInvalid = self.fInvalid;
1572 oCopy.sInvalidStyle = self.sInvalidStyle;
1573 oCopy.sXcptType = self.sXcptType;
1574
1575 oCopy.sStats = self.sStats;
1576 oCopy.sFunction = self.sFunction;
1577 oCopy.fStub = self.fStub;
1578 oCopy.fUdStub = self.fUdStub;
1579
1580 oCopy.iLineCompleted = self.iLineCompleted;
1581 oCopy.cOpTags = self.cOpTags;
1582 oCopy.iLineFnIemOpMacro = self.iLineFnIemOpMacro;
1583 oCopy.iLineMnemonicMacro = self.iLineMnemonicMacro;
1584
1585 oCopy.sRawDisOpNo = self.sRawDisOpNo;
1586 oCopy.asRawDisParams = list(self.asRawDisParams);
1587 oCopy.sRawIemOpFlags = self.sRawIemOpFlags;
1588 oCopy.sRawOldOpcodes = self.sRawOldOpcodes;
1589 oCopy.asCopyTests = list(self.asCopyTests);
1590
1591 return oCopy;
1592
1593 def getOpcodeByte(self):
1594 """
1595 Decodes sOpcode into a byte range integer value.
1596 Raises exception if sOpcode is None or invalid.
1597 """
1598 if self.sOpcode is None:
1599 raise Exception('No opcode byte for %s!' % (self,));
1600 sOpcode = str(self.sOpcode); # pylint type confusion workaround.
1601
1602 # Full hex byte form.
1603 if sOpcode[:2] == '0x':
1604 return int(sOpcode, 16);
1605
1606 # The /r form:
1607 if len(sOpcode) == 2 and sOpcode[0] == '/' and sOpcode[1].isdigit():
1608 return int(sOpcode[1:]) << 3;
1609
1610 # The 11/r form:
1611 if len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1].isdigit():
1612 return (int(sOpcode[-1:]) << 3) | 0xc0;
1613
1614 # The !11/r form (returns mod=1):
1615 ## @todo this doesn't really work...
1616 if len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1].isdigit():
1617 return (int(sOpcode[-1:]) << 3) | 0x80;
1618
1619 raise Exception('unsupported opcode byte spec "%s" for %s' % (sOpcode, self,));
1620
1621 @staticmethod
1622 def _flagsToIntegerMask(asFlags):
1623 """
1624 Returns the integer mask value for asFlags.
1625 """
1626 uRet = 0;
1627 if asFlags:
1628 for sFlag in asFlags:
1629 sConstant = g_kdEFlagsMnemonics[sFlag];
1630 assert sConstant[0] != '!', sConstant
1631 uRet |= g_kdX86EFlagsConstants[sConstant];
1632 return uRet;
1633
1634 def getTestedFlagsMask(self):
1635 """ Returns asFlTest into a integer mask value """
1636 return self._flagsToIntegerMask(self.asFlTest);
1637
1638 def getModifiedFlagsMask(self):
1639 """ Returns asFlModify into a integer mask value """
1640 return self._flagsToIntegerMask(self.asFlModify);
1641
1642 def getUndefinedFlagsMask(self):
1643 """ Returns asFlUndefined into a integer mask value """
1644 return self._flagsToIntegerMask(self.asFlUndefined);
1645
1646 def getSetFlagsMask(self):
1647 """ Returns asFlSet into a integer mask value """
1648 return self._flagsToIntegerMask(self.asFlSet);
1649
1650 def getClearedFlagsMask(self):
1651 """ Returns asFlClear into a integer mask value """
1652 return self._flagsToIntegerMask(self.asFlClear);
1653
1654 def onlyInVexMaps(self):
1655 """ Returns True if only in VEX maps, otherwise False. (No maps -> False) """
1656 if not self.aoMaps:
1657 return False;
1658 for oMap in self.aoMaps:
1659 if not oMap.isVexMap():
1660 return False;
1661 return True;
1662
1663
1664
1665## All the instructions.
1666g_aoAllInstructions = [] # type: list(Instruction)
1667
1668## All the instructions indexed by statistics name (opstat).
1669g_dAllInstructionsByStat = {} # type: dict(Instruction)
1670
1671## All the instructions indexed by function name (opfunction).
1672g_dAllInstructionsByFunction = {} # type: dict(list(Instruction))
1673
1674## Instructions tagged by oponlytest
1675g_aoOnlyTestInstructions = [] # type: list(Instruction)
1676
1677## Instruction maps.
1678g_aoInstructionMaps = [
1679 InstructionMap('one', 'g_apfnOneByteMap', sSelector = 'byte'),
1680 InstructionMap('grp1_80', asLeadOpcodes = ['0x80',], sSelector = '/r'),
1681 InstructionMap('grp1_81', asLeadOpcodes = ['0x81',], sSelector = '/r'),
1682 InstructionMap('grp1_82', asLeadOpcodes = ['0x82',], sSelector = '/r'),
1683 InstructionMap('grp1_83', asLeadOpcodes = ['0x83',], sSelector = '/r'),
1684 InstructionMap('grp1a', asLeadOpcodes = ['0x8f',], sSelector = '/r'),
1685 InstructionMap('grp2_c0', asLeadOpcodes = ['0xc0',], sSelector = '/r'),
1686 InstructionMap('grp2_c1', asLeadOpcodes = ['0xc1',], sSelector = '/r'),
1687 InstructionMap('grp2_d0', asLeadOpcodes = ['0xd0',], sSelector = '/r'),
1688 InstructionMap('grp2_d1', asLeadOpcodes = ['0xd1',], sSelector = '/r'),
1689 InstructionMap('grp2_d2', asLeadOpcodes = ['0xd2',], sSelector = '/r'),
1690 InstructionMap('grp2_d3', asLeadOpcodes = ['0xd3',], sSelector = '/r'),
1691 ## @todo g_apfnEscF1_E0toFF
1692 InstructionMap('grp3_f6', asLeadOpcodes = ['0xf6',], sSelector = '/r'),
1693 InstructionMap('grp3_f7', asLeadOpcodes = ['0xf7',], sSelector = '/r'),
1694 InstructionMap('grp4', asLeadOpcodes = ['0xfe',], sSelector = '/r'),
1695 InstructionMap('grp5', asLeadOpcodes = ['0xff',], sSelector = '/r'),
1696 InstructionMap('grp11_c6_m', asLeadOpcodes = ['0xc6',], sSelector = '!11 /r'),
1697 InstructionMap('grp11_c6_r', asLeadOpcodes = ['0xc6',], sSelector = '11'), # xabort
1698 InstructionMap('grp11_c7_m', asLeadOpcodes = ['0xc7',], sSelector = '!11 /r'),
1699 InstructionMap('grp11_c7_r', asLeadOpcodes = ['0xc7',], sSelector = '11'), # xbegin
1700
1701 InstructionMap('two0f', 'g_apfnTwoByteMap', asLeadOpcodes = ['0x0f',], sDisParse = 'IDX_ParseTwoByteEsc'),
1702 InstructionMap('grp6', 'g_apfnGroup6', asLeadOpcodes = ['0x0f', '0x00',], sSelector = '/r'),
1703 InstructionMap('grp7_m', 'g_apfnGroup7Mem', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '!11 /r'),
1704 InstructionMap('grp7_r', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '11'),
1705 InstructionMap('grp8', asLeadOpcodes = ['0x0f', '0xba',], sSelector = '/r'),
1706 InstructionMap('grp9', 'g_apfnGroup9RegReg', asLeadOpcodes = ['0x0f', '0xc7',], sSelector = 'mod /r'),
1707 ## @todo What about g_apfnGroup9MemReg?
1708 InstructionMap('grp10', None, asLeadOpcodes = ['0x0f', '0xb9',], sSelector = '/r'), # UD1 /w modr/m
1709 InstructionMap('grp12', 'g_apfnGroup12RegReg', asLeadOpcodes = ['0x0f', '0x71',], sSelector = 'mod /r'),
1710 InstructionMap('grp13', 'g_apfnGroup13RegReg', asLeadOpcodes = ['0x0f', '0x72',], sSelector = 'mod /r'),
1711 InstructionMap('grp14', 'g_apfnGroup14RegReg', asLeadOpcodes = ['0x0f', '0x73',], sSelector = 'mod /r'),
1712 InstructionMap('grp15', 'g_apfnGroup15MemReg', asLeadOpcodes = ['0x0f', '0xae',], sSelector = 'memreg /r'),
1713 ## @todo What about g_apfnGroup15RegReg?
1714 InstructionMap('grp16', asLeadOpcodes = ['0x0f', '0x18',], sSelector = 'mod /r'),
1715 InstructionMap('grpA17', asLeadOpcodes = ['0x0f', '0x78',], sSelector = '/r'), # AMD: EXTRQ weirdness
1716 InstructionMap('grpP', asLeadOpcodes = ['0x0f', '0x0d',], sSelector = '/r'), # AMD: prefetch
1717
1718 InstructionMap('three0f38', 'g_apfnThreeByte0f38', asLeadOpcodes = ['0x0f', '0x38',]),
1719 InstructionMap('three0f3a', 'g_apfnThreeByte0f3a', asLeadOpcodes = ['0x0f', '0x3a',]),
1720
1721 InstructionMap('vexmap1', 'g_apfnVexMap1', sEncoding = 'vex1'),
1722 InstructionMap('vexgrp12', 'g_apfnVexGroup12RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x71',], sSelector = 'mod /r'),
1723 InstructionMap('vexgrp13', 'g_apfnVexGroup13RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x72',], sSelector = 'mod /r'),
1724 InstructionMap('vexgrp14', 'g_apfnVexGroup14RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x73',], sSelector = 'mod /r'),
1725 InstructionMap('vexgrp15', 'g_apfnVexGroup15MemReg', sEncoding = 'vex1', asLeadOpcodes = ['0xae',], sSelector = 'memreg /r'),
1726 InstructionMap('vexgrp17', 'g_apfnVexGroup17_f3', sEncoding = 'vex1', asLeadOpcodes = ['0xf3',], sSelector = '/r'),
1727
1728 InstructionMap('vexmap2', 'g_apfnVexMap2', sEncoding = 'vex2'),
1729 InstructionMap('vexmap3', 'g_apfnVexMap3', sEncoding = 'vex3'),
1730
1731 InstructionMap('3dnow', asLeadOpcodes = ['0x0f', '0x0f',]),
1732 InstructionMap('xopmap8', sEncoding = 'xop8'),
1733 InstructionMap('xopmap9', sEncoding = 'xop9'),
1734 InstructionMap('xopgrp1', sEncoding = 'xop9', asLeadOpcodes = ['0x01'], sSelector = '/r'),
1735 InstructionMap('xopgrp2', sEncoding = 'xop9', asLeadOpcodes = ['0x02'], sSelector = '/r'),
1736 InstructionMap('xopgrp3', sEncoding = 'xop9', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1737 InstructionMap('xopmap10', sEncoding = 'xop10'),
1738 InstructionMap('xopgrp4', sEncoding = 'xop10', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1739];
1740g_dInstructionMaps = { oMap.sName: oMap for oMap in g_aoInstructionMaps };
1741g_dInstructionMapsByIemName = { oMap.sIemName: oMap for oMap in g_aoInstructionMaps };
1742
1743
1744#
1745# Decoder functions.
1746#
1747
1748class DecoderFunction(object):
1749 """
1750 Decoder function.
1751
1752 This is mainly for searching for scoping searches for variables used in
1753 microcode blocks.
1754 """
1755 def __init__(self, sSrcFile, iBeginLine, sName, asDefArgs):
1756 self.sName = sName; ##< The function name.
1757 self.asDefArgs = asDefArgs; ##< The FNIEMOP*DEF/STUB* macro argument list, 0th element is the macro name.
1758 self.sSrcFile = sSrcFile; ##< The source file the function is defined in.
1759 self.iBeginLine = iBeginLine; ##< The start line.
1760 self.iEndLine = -1; ##< The line the function (probably) ends on.
1761 self.asLines = [] # type: list(str) ##< The raw lines the function is made up of.
1762
1763 def complete(self, iEndLine, asLines):
1764 """
1765 Completes the function.
1766 """
1767 assert self.iEndLine == -1;
1768 self.iEndLine = iEndLine;
1769 self.asLines = asLines;
1770
1771
1772#
1773# "Microcode" statements and blocks
1774#
1775
1776class McStmt(object):
1777 """
1778 Statement in a microcode block.
1779 """
1780 def __init__(self, sName, asParams):
1781 self.sName = sName; ##< 'IEM_MC_XXX' or 'C++'.
1782 self.asParams = asParams;
1783 self.oUser = None;
1784
1785 def renderCode(self, cchIndent = 0):
1786 """
1787 Renders the code for the statement.
1788 """
1789 return ' ' * cchIndent + self.sName + '(' + ', '.join(self.asParams) + ');\n';
1790
1791 @staticmethod
1792 def renderCodeForList(aoStmts, cchIndent = 0):
1793 """
1794 Renders a list of statements.
1795 """
1796 return ''.join([oStmt.renderCode(cchIndent) for oStmt in aoStmts]);
1797
1798 @staticmethod
1799 def findStmtByNames(aoStmts, dNames):
1800 """
1801 Returns first statement with any of the given names in from the list.
1802
1803 Note! The names are passed as a dictionary for quick lookup, the value
1804 does not matter.
1805 """
1806 for oStmt in aoStmts:
1807 if oStmt.sName in dNames:
1808 return oStmt;
1809 if isinstance(oStmt, McStmtCond):
1810 oHit = McStmt.findStmtByNames(oStmt.aoIfBranch, dNames);
1811 if not oHit:
1812 oHit = McStmt.findStmtByNames(oStmt.aoElseBranch, dNames);
1813 if oHit:
1814 return oHit;
1815 return None;
1816
1817 def isCppStmt(self):
1818 """ Checks if this is a C++ statement. """
1819 return self.sName.startswith('C++');
1820
1821class McStmtCond(McStmt):
1822 """
1823 Base class for conditional statements (IEM_MC_IF_XXX).
1824 """
1825 def __init__(self, sName, asParams):
1826 McStmt.__init__(self, sName, asParams);
1827 self.aoIfBranch = [];
1828 self.aoElseBranch = [];
1829
1830 def renderCode(self, cchIndent = 0):
1831 sRet = ' ' * cchIndent + self.sName + '(' + ', '.join(self.asParams) + ') {\n';
1832 sRet += self.renderCodeForList(self.aoIfBranch, cchIndent + 4);
1833 if self.aoElseBranch:
1834 sRet += ' ' * cchIndent + '} IEM_MC_ELSE() {\n';
1835 sRet += self.renderCodeForList(self.aoElseBranch, cchIndent + 4);
1836 sRet += ' ' * cchIndent + '} IEM_MC_ENDIF();\n';
1837 return sRet;
1838
1839class McStmtVar(McStmt):
1840 """ IEM_MC_LOCAL_VAR, IEM_MC_LOCAL_CONST """
1841 def __init__(self, sName, asParams, sType, sVarName, sConstValue = None):
1842 McStmt.__init__(self, sName, asParams);
1843 self.sType = sType;
1844 self.sVarName = sVarName;
1845 self.sConstValue = sConstValue; ##< None if not const.
1846
1847class McStmtArg(McStmtVar):
1848 """ IEM_MC_ARG, IEM_MC_ARG_CONST, IEM_MC_ARG_LOCAL_REF """
1849 def __init__(self, sName, asParams, sType, sVarName, iArg, sConstValue = None, sRef = None, sRefType = 'none'):
1850 McStmtVar.__init__(self, sName, asParams, sType, sVarName, sConstValue);
1851 self.iArg = iArg;
1852 self.sRef = sRef; ##< The reference string (local variable, register).
1853 self.sRefType = sRefType; ##< The kind of reference: 'local', 'none'.
1854 assert sRefType in ('none', 'local');
1855
1856
1857class McStmtCall(McStmt):
1858 """ IEM_MC_CALL_* """
1859 def __init__(self, sName, asParams, iFnParam, iRcNameParam = -1):
1860 McStmt.__init__(self, sName, asParams);
1861 self.idxFn = iFnParam;
1862 self.idxParams = iFnParam + 1;
1863 self.sFn = asParams[iFnParam];
1864 self.iRcName = None if iRcNameParam < 0 else asParams[iRcNameParam];
1865
1866class McCppGeneric(McStmt):
1867 """
1868 Generic C++/C statement.
1869 """
1870 def __init__(self, sCode, fDecode, sName = 'C++'):
1871 McStmt.__init__(self, sName, [sCode,]);
1872 self.fDecode = fDecode;
1873
1874 def renderCode(self, cchIndent = 0):
1875 sRet = ' ' * cchIndent + self.asParams[0] + '\n';
1876 if self.fDecode:
1877 sRet = sRet.replace('\n', ' // C++ decode\n');
1878 else:
1879 sRet = sRet.replace('\n', ' // C++ normal\n');
1880 return sRet;
1881
1882class McCppCond(McStmtCond):
1883 """
1884 C++/C 'if' statement.
1885 """
1886 def __init__(self, sCode, fDecode):
1887 McStmtCond.__init__(self, 'C++/if', [sCode,]);
1888 self.fDecode = fDecode;
1889
1890 def renderCode(self, cchIndent = 0):
1891 sAnnotation = '// C++ decode' if self.fDecode else '// C++ normal';
1892 sRet = ' ' * cchIndent + 'if (' + self.asParams[0] + ') ' + sAnnotation + '\n';
1893 sRet += ' ' * cchIndent + '{\n';
1894 sRet += self.renderCodeForList(self.aoIfBranch, cchIndent + 4);
1895 sRet += ' ' * cchIndent + '}\n';
1896 if self.aoElseBranch:
1897 sRet += ' ' * cchIndent + 'else ' + sAnnotation + '\n';
1898 sRet += ' ' * cchIndent + '{\n';
1899 sRet += self.renderCodeForList(self.aoElseBranch, cchIndent + 4);
1900 sRet += ' ' * cchIndent + '}\n';
1901 return sRet;
1902
1903class McCppPreProc(McCppGeneric):
1904 """
1905 C++/C Preprocessor directive.
1906 """
1907 def __init__(self, sCode):
1908 McCppGeneric.__init__(self, sCode, False, sName = 'C++/preproc');
1909
1910 def renderCode(self, cchIndent = 0):
1911 return self.asParams[0] + '\n';
1912
1913
1914class McBlock(object):
1915 """
1916 Microcode block (IEM_MC_BEGIN ... IEM_MC_END).
1917 """
1918
1919 def __init__(self, sSrcFile, iBeginLine, offBeginLine, oFunction, iInFunction, cchIndent = None):
1920 self.sSrcFile = sSrcFile; ##< The source file containing the block.
1921 self.iBeginLine = iBeginLine; ##< The line with the IEM_MC_BEGIN statement.
1922 self.offBeginLine = offBeginLine; ##< The offset of the IEM_MC_BEGIN statement within the line.
1923 self.iEndLine = -1; ##< The line with the IEM_MC_END statement.
1924 self.offEndLine = 0; ##< The offset of the IEM_MC_END statement within the line.
1925 self.oFunction = oFunction; ##< The function the block resides in.
1926 self.sFunction = oFunction.sName; ##< The name of the function the block resides in. DEPRECATED.
1927 self.iInFunction = iInFunction; ##< The block number wihtin the function.
1928 self.cchIndent = cchIndent if cchIndent else offBeginLine;
1929 self.asLines = [] # type: list(str) ##< The raw lines the block is made up of.
1930 ## Decoded statements in the block.
1931 self.aoStmts = [] # type: list(McStmt)
1932
1933 def complete(self, iEndLine, offEndLine, asLines):
1934 """
1935 Completes the microcode block.
1936 """
1937 assert self.iEndLine == -1;
1938 self.iEndLine = iEndLine;
1939 self.offEndLine = offEndLine;
1940 self.asLines = asLines;
1941
1942 def raiseDecodeError(self, sRawCode, off, sMessage):
1943 """ Raises a decoding error. """
1944 offStartOfLine = sRawCode.rfind('\n', 0, off) + 1;
1945 iLine = sRawCode.count('\n', 0, off);
1946 raise ParserException('%s:%d:%d: parsing error: %s'
1947 % (self.sSrcFile, self.iBeginLine + iLine, off - offStartOfLine + 1, sMessage,));
1948
1949 def raiseStmtError(self, sName, sMessage):
1950 """ Raises a statement parser error. """
1951 raise ParserException('%s:%d: %s: parsing error: %s' % (self.sSrcFile, self.iBeginLine, sName, sMessage,));
1952
1953 def checkStmtParamCount(self, sName, asParams, cParamsExpected):
1954 """ Check the parameter count, raising an error it doesn't match. """
1955 if len(asParams) != cParamsExpected:
1956 raise ParserException('%s:%d: %s: Expected %s parameters, found %s!'
1957 % (self.sSrcFile, self.iBeginLine, sName, cParamsExpected, len(asParams),));
1958 return True;
1959
1960 @staticmethod
1961 def parseMcGeneric(oSelf, sName, asParams):
1962 """ Generic parser that returns a plain McStmt object. """
1963 _ = oSelf;
1964 return McStmt(sName, asParams);
1965
1966 @staticmethod
1967 def parseMcGenericCond(oSelf, sName, asParams):
1968 """ Generic parser that returns a plain McStmtCond object. """
1969 _ = oSelf;
1970 return McStmtCond(sName, asParams);
1971
1972 @staticmethod
1973 def parseMcBegin(oSelf, sName, asParams):
1974 """ IEM_MC_BEGIN """
1975 oSelf.checkStmtParamCount(sName, asParams, 2);
1976 return McBlock.parseMcGeneric(oSelf, sName, asParams);
1977
1978 @staticmethod
1979 def parseMcArg(oSelf, sName, asParams):
1980 """ IEM_MC_ARG """
1981 oSelf.checkStmtParamCount(sName, asParams, 3);
1982 return McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[2]));
1983
1984 @staticmethod
1985 def parseMcArgConst(oSelf, sName, asParams):
1986 """ IEM_MC_ARG_CONST """
1987 oSelf.checkStmtParamCount(sName, asParams, 4);
1988 return McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[3]), sConstValue = asParams[2]);
1989
1990 @staticmethod
1991 def parseMcArgLocalRef(oSelf, sName, asParams):
1992 """ IEM_MC_ARG_LOCAL_REF """
1993 oSelf.checkStmtParamCount(sName, asParams, 4);
1994 return McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[3]), sRef = asParams[2], sRefType = 'local');
1995
1996 @staticmethod
1997 def parseMcArgLocalEFlags(oSelf, sName, asParams):
1998 """ IEM_MC_ARG_LOCAL_EFLAGS """
1999 oSelf.checkStmtParamCount(sName, asParams, 3);
2000 # Note! We split this one up into IEM_MC_LOCAL_VAR and IEM_MC_ARG_LOCAL_REF.
2001 return (
2002 McStmtVar('IEM_MC_LOCAL_VAR', ['uint32_t', asParams[1],], 'uint32_t', asParams[1]),
2003 McStmtArg('IEM_MC_ARG_LOCAL_REF', ['uint32_t *', asParams[0], asParams[2], asParams[1]],
2004 'uint32_t *', asParams[0], int(asParams[2]), sRef = asParams[1], sRefType = 'local'),
2005 );
2006
2007 @staticmethod
2008 def parseMcLocal(oSelf, sName, asParams):
2009 """ IEM_MC_LOCAL """
2010 oSelf.checkStmtParamCount(sName, asParams, 2);
2011 return McStmtVar(sName, asParams, asParams[0], asParams[1]);
2012
2013 @staticmethod
2014 def parseMcLocalConst(oSelf, sName, asParams):
2015 """ IEM_MC_LOCAL_CONST """
2016 oSelf.checkStmtParamCount(sName, asParams, 3);
2017 return McStmtVar(sName, asParams, asParams[0], asParams[1], sConstValue = asParams[2]);
2018
2019 @staticmethod
2020 def parseMcCallAImpl(oSelf, sName, asParams):
2021 """ IEM_MC_CALL_AIMPL_3|4 """
2022 cArgs = int(sName[-1]);
2023 oSelf.checkStmtParamCount(sName, asParams, 2 + cArgs);
2024 return McStmtCall(sName, asParams, 1, 0);
2025
2026 @staticmethod
2027 def parseMcCallVoidAImpl(oSelf, sName, asParams):
2028 """ IEM_MC_CALL_VOID_AIMPL_2|3 """
2029 cArgs = int(sName[-1]);
2030 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2031 return McStmtCall(sName, asParams, 0);
2032
2033 @staticmethod
2034 def parseMcCallAvxAImpl(oSelf, sName, asParams):
2035 """ IEM_MC_CALL_AVX_AIMPL_2|3 """
2036 cArgs = int(sName[-1]);
2037 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2038 return McStmtCall(sName, asParams, 0);
2039
2040 @staticmethod
2041 def parseMcCallFpuAImpl(oSelf, sName, asParams):
2042 """ IEM_MC_CALL_FPU_AIMPL_1|2|3 """
2043 cArgs = int(sName[-1]);
2044 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2045 return McStmtCall(sName, asParams, 0);
2046
2047 @staticmethod
2048 def parseMcCallMmxAImpl(oSelf, sName, asParams):
2049 """ IEM_MC_CALL_MMX_AIMPL_2|3 """
2050 cArgs = int(sName[-1]);
2051 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2052 return McStmtCall(sName, asParams, 0);
2053
2054 @staticmethod
2055 def parseMcCallSseAImpl(oSelf, sName, asParams):
2056 """ IEM_MC_CALL_SSE_AIMPL_2|3 """
2057 cArgs = int(sName[-1]);
2058 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2059 return McStmtCall(sName, asParams, 0);
2060
2061 @staticmethod
2062 def parseMcCallCImpl(oSelf, sName, asParams):
2063 """ IEM_MC_CALL_CIMPL_0|1|2|3|4|5 """
2064 cArgs = int(sName[-1]);
2065 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2066 return McStmtCall(sName, asParams, 0);
2067
2068 @staticmethod
2069 def stripComments(sCode):
2070 """ Returns sCode with comments removed. """
2071 off = 0;
2072 while off < len(sCode):
2073 off = sCode.find('/', off);
2074 if off < 0 or off + 1 >= len(sCode):
2075 break;
2076
2077 if sCode[off + 1] == '/':
2078 # C++ comment.
2079 offEnd = sCode.find('\n', off + 2);
2080 if offEnd < 0:
2081 return sCode[:off].rstrip();
2082 sCode = sCode[ : off] + sCode[offEnd : ];
2083 off += 1;
2084
2085 elif sCode[off + 1] == '*':
2086 # C comment
2087 offEnd = sCode.find('*/', off + 2);
2088 if offEnd < 0:
2089 return sCode[:off].rstrip();
2090 sSep = ' ';
2091 if (off > 0 and sCode[off - 1].isspace()) or (offEnd + 2 < len(sCode) and sCode[offEnd + 2].isspace()):
2092 sSep = '';
2093 sCode = sCode[ : off] + sSep + sCode[offEnd + 2 : ];
2094 off += len(sSep);
2095
2096 else:
2097 # Not a comment.
2098 off += 1;
2099 return sCode;
2100
2101 @staticmethod
2102 def extractParam(sCode, offParam):
2103 """
2104 Extracts the parameter value at offParam in sCode.
2105 Returns stripped value and the end offset of the terminating ',' or ')'.
2106 """
2107 # Extract it.
2108 cNesting = 0;
2109 offStart = offParam;
2110 while offParam < len(sCode):
2111 ch = sCode[offParam];
2112 if ch == '(':
2113 cNesting += 1;
2114 elif ch == ')':
2115 if cNesting == 0:
2116 break;
2117 cNesting -= 1;
2118 elif ch == ',' and cNesting == 0:
2119 break;
2120 offParam += 1;
2121 return (sCode[offStart : offParam].strip(), offParam);
2122
2123 @staticmethod
2124 def extractParams(sCode, offOpenParen):
2125 """
2126 Parses a parameter list.
2127 Returns the list of parameter values and the offset of the closing parentheses.
2128 Returns (None, len(sCode)) on if no closing parentheses was found.
2129 """
2130 assert sCode[offOpenParen] == '(';
2131 asParams = [];
2132 off = offOpenParen + 1;
2133 while off < len(sCode):
2134 ch = sCode[off];
2135 if ch.isspace():
2136 off += 1;
2137 elif ch != ')':
2138 (sParam, off) = McBlock.extractParam(sCode, off);
2139 asParams.append(sParam);
2140 assert off < len(sCode), 'off=%s sCode=%s:"%s"' % (off, len(sCode), sCode,);
2141 if sCode[off] == ',':
2142 off += 1;
2143 else:
2144 return (asParams, off);
2145 return (None, off);
2146
2147 @staticmethod
2148 def findClosingBraces(sCode, off, offStop):
2149 """
2150 Finds the matching '}' for the '{' at off in sCode.
2151 Returns offset of the matching '}' on success, otherwise -1.
2152
2153 Note! Does not take comments into account.
2154 """
2155 cDepth = 1;
2156 off += 1;
2157 while off < offStop:
2158 offClose = sCode.find('}', off, offStop);
2159 if offClose < 0:
2160 break;
2161 cDepth += sCode.count('{', off, offClose);
2162 cDepth -= 1;
2163 if cDepth == 0:
2164 return offClose;
2165 off = offClose + 1;
2166 return -1;
2167
2168 @staticmethod
2169 def countSpacesAt(sCode, off, offStop):
2170 """ Returns the number of space characters at off in sCode. """
2171 offStart = off;
2172 while off < offStop and sCode[off].isspace():
2173 off += 1;
2174 return off - offStart;
2175
2176 @staticmethod
2177 def skipSpacesAt(sCode, off, offStop):
2178 """ Returns first offset at or after off for a non-space character. """
2179 return off + McBlock.countSpacesAt(sCode, off, offStop);
2180
2181 @staticmethod
2182 def isSubstrAt(sStr, off, sSubStr):
2183 """ Returns true of sSubStr is found at off in sStr. """
2184 return sStr[off : off + len(sSubStr)] == sSubStr;
2185
2186 koReCppCtrlStmts = re.compile(r'\b(if\s*[(]|else\b|while\s*[(]|for\s*[(]|do\b)');
2187 koReIemDecoderVars = re.compile( r'iem\.s\.(fPrefixes|uRexReg|uRexB|uRexIndex|iEffSeg|offModRm|cbOpcode|offOpcode'
2188 + r'|enmEffOpSize|enmDefOpSize|enmDefAddrMode|enmEffAddrMode|idxPrefix'
2189 + r'|uVex3rdReg|uVexLength|fEvxStuff|uFpuOpcode|abOpcode'
2190 + r')');
2191
2192 def decodeCode(self, sRawCode, off = 0, offStop = -1, iLevel = 0): # pylint: disable=too-many-statements,too-many-branches
2193 """
2194 Decodes sRawCode[off : offStop].
2195
2196 Returns list of McStmt instances.
2197 Raises ParserException on failure.
2198 """
2199 if offStop < 0:
2200 offStop = len(sRawCode);
2201 aoStmts = [];
2202 while off < offStop:
2203 ch = sRawCode[off];
2204
2205 #
2206 # Skip spaces and comments.
2207 #
2208 if ch.isspace():
2209 off += 1;
2210
2211 elif ch == '/':
2212 ch = sRawCode[off + 1];
2213 if ch == '/': # C++ comment.
2214 off = sRawCode.find('\n', off + 2);
2215 if off < 0:
2216 break;
2217 off += 1;
2218 elif ch == '*': # C comment.
2219 off = sRawCode.find('*/', off + 2);
2220 if off < 0:
2221 break;
2222 off += 2;
2223 else:
2224 self.raiseDecodeError(sRawCode, off, 'Unexpected "/"');
2225
2226 #
2227 # Is it a MC statement.
2228 #
2229 elif ch == 'I' and sRawCode[off : off + len('IEM_MC_')] == 'IEM_MC_':
2230 # All MC statements ends with a semicolon, except for conditionals which ends with a '{'.
2231 # Extract it and strip comments from it.
2232 if not self.isSubstrAt(sRawCode, off, 'IEM_MC_IF_'):
2233 offEnd = sRawCode.find(';', off + len('IEM_MC_'));
2234 if offEnd <= off:
2235 self.raiseDecodeError(sRawCode, off, 'MC statement without a ";"');
2236 else:
2237 offEnd = sRawCode.find('{', off + len('IEM_MC_IF_'));
2238 if offEnd <= off:
2239 self.raiseDecodeError(sRawCode, off, 'MC conditional statement without a "{"');
2240 if sRawCode.find(';', off + len('IEM_MC_IF_'), offEnd) > off:
2241 self.raiseDecodeError(sRawCode, off, 'MC conditional statement without an immediate "{"');
2242 offEnd -= 1;
2243 while offEnd > off and sRawCode[offEnd - 1].isspace():
2244 offEnd -= 1;
2245
2246 sRawStmt = self.stripComments(sRawCode[off : offEnd]);
2247
2248 # Isolate the statement name.
2249 offOpenParen = sRawStmt.find('(');
2250 if offOpenParen < 0:
2251 self.raiseDecodeError(sRawCode, off, 'MC statement without a "("');
2252 sName = sRawStmt[: offOpenParen].strip();
2253
2254 # Extract the parameters.
2255 (asParams, offCloseParen) = self.extractParams(sRawStmt, offOpenParen);
2256 if asParams is None:
2257 self.raiseDecodeError(sRawCode, off, 'MC statement without a closing parenthesis');
2258 if offCloseParen + 1 != len(sRawStmt):
2259 self.raiseDecodeError(sRawCode, off,
2260 'Unexpected code following MC statement: %s' % (sRawStmt[offCloseParen + 1:]));
2261
2262 # Hand it to the handler.
2263 fnParser = g_dMcStmtParsers.get(sName);
2264 if not fnParser:
2265 self.raiseDecodeError(sRawCode, off, 'Unknown MC statement: %s' % (sName,));
2266 oStmt = fnParser(self, sName, asParams);
2267 if not isinstance(oStmt, (list, tuple)):
2268 aoStmts.append(oStmt);
2269 else:
2270 aoStmts.extend(oStmt);
2271
2272 #
2273 # If conditional, we need to parse the whole statement.
2274 #
2275 # For reasons of simplicity, we assume the following structure
2276 # and parse each branch in a recursive call:
2277 # IEM_MC_IF_XXX() {
2278 # IEM_MC_WHATEVER();
2279 # } IEM_MC_ELSE() {
2280 # IEM_MC_WHATEVER();
2281 # } IEM_MC_ENDIF();
2282 #
2283 if sName.startswith('IEM_MC_IF_'):
2284 if iLevel > 1:
2285 self.raiseDecodeError(sRawCode, off, 'Too deep nesting of conditionals.');
2286
2287 # Find start of the IF block:
2288 offBlock1 = self.skipSpacesAt(sRawCode, offEnd, offStop);
2289 if sRawCode[offBlock1] != '{':
2290 self.raiseDecodeError(sRawCode, offBlock1, 'Expected "{" following %s' % (sName,));
2291
2292 # Find the end of it.
2293 offBlock1End = self.findClosingBraces(sRawCode, offBlock1, offStop);
2294 if offBlock1End < 0:
2295 self.raiseDecodeError(sRawCode, offBlock1, 'No matching "}" closing IF block of %s' % (sName,));
2296
2297 oStmt.aoIfBranch = self.decodeCode(sRawCode, offBlock1 + 1, offBlock1End, iLevel + 1);
2298
2299 # Is there an else section?
2300 off = self.skipSpacesAt(sRawCode, offBlock1End + 1, offStop);
2301 if self.isSubstrAt(sRawCode, off, 'IEM_MC_ELSE'):
2302 off = self.skipSpacesAt(sRawCode, off + len('IEM_MC_ELSE'), offStop);
2303 if sRawCode[off] != '(':
2304 self.raiseDecodeError(sRawCode, off, 'Expected "(" following IEM_MC_ELSE"');
2305 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2306 if sRawCode[off] != ')':
2307 self.raiseDecodeError(sRawCode, off, 'Expected ")" following IEM_MC_ELSE("');
2308
2309 # Find start of the ELSE block.
2310 offBlock2 = self.skipSpacesAt(sRawCode, off + 1, offStop);
2311 if sRawCode[offBlock2] != '{':
2312 self.raiseDecodeError(sRawCode, offBlock2, 'Expected "{" following IEM_MC_ELSE()"');
2313
2314 # Find the end of it.
2315 offBlock2End = self.findClosingBraces(sRawCode, offBlock2, offStop);
2316 if offBlock2End < 0:
2317 self.raiseDecodeError(sRawCode, offBlock2, 'No matching "}" closing ELSE block of %s' % (sName,));
2318
2319 oStmt.aoElseBranch = self.decodeCode(sRawCode, offBlock2 + 1, offBlock2End, iLevel + 1);
2320 off = self.skipSpacesAt(sRawCode, offBlock2End + 1, offStop);
2321
2322 # Parse past the endif statement.
2323 if not self.isSubstrAt(sRawCode, off, 'IEM_MC_ENDIF'):
2324 self.raiseDecodeError(sRawCode, off, 'Expected IEM_MC_ENDIF for closing %s' % (sName,));
2325 off = self.skipSpacesAt(sRawCode, off + len('IEM_MC_ENDIF'), offStop);
2326 if sRawCode[off] != '(':
2327 self.raiseDecodeError(sRawCode, off, 'Expected "(" following IEM_MC_ENDIF"');
2328 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2329 if sRawCode[off] != ')':
2330 self.raiseDecodeError(sRawCode, off, 'Expected ")" following IEM_MC_ENDIF("');
2331 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2332 if sRawCode[off] != ';':
2333 self.raiseDecodeError(sRawCode, off, 'Expected ";" following IEM_MC_ENDIF()"');
2334 off += 1;
2335
2336 else:
2337 # Advance.
2338 off = offEnd + 1;
2339
2340 #
2341 # Otherwise it must be a C/C++ statement of sorts.
2342 #
2343 else:
2344 # Find the end of the statement. if and else requires special handling.
2345 sCondExpr = None;
2346 oMatch = self.koReCppCtrlStmts.match(sRawCode, off);
2347 if oMatch:
2348 if oMatch.group(1)[-1] == '(':
2349 (sCondExpr, offEnd) = self.extractParam(sRawCode, oMatch.end());
2350 else:
2351 offEnd = oMatch.end();
2352 if not oMatch.group(1).startswith('if') and oMatch.group(1) != 'else':
2353 self.raiseDecodeError(sRawCode, off, 'Only if/else control statements allowed: %s' % (oMatch.group(1),));
2354 elif ch == '#':
2355 offEnd = sRawCode.find('\n', off, offStop);
2356 if offEnd < 0:
2357 offEnd = offStop;
2358 offEnd -= 1;
2359 while offEnd > off and sRawCode[offEnd - 1].isspace():
2360 offEnd -= 1;
2361 else:
2362 offEnd = sRawCode.find(';', off);
2363 if offEnd < 0:
2364 self.raiseDecodeError(sRawCode, off, 'C++ statement without a ";"');
2365
2366 # Check this and the following statement whether it might have
2367 # something to do with decoding. This is a statement filter
2368 # criteria when generating the threaded functions blocks.
2369 offNextEnd = sRawCode.find(';', offEnd + 1);
2370 fDecode = ( sRawCode.find('IEM_OPCODE_', off, max(offEnd, offNextEnd)) >= 0
2371 or sRawCode.find('IEMOP_HLP_DONE_', off, max(offEnd, offNextEnd)) >= 0
2372 or sRawCode.find('IEMOP_HLP_DECODED_', off, offEnd) >= 0
2373 or sRawCode.find('IEMOP_HLP_RAISE_UD_IF_MISSING_GUEST_FEATURE', off, offEnd) >= 0
2374 );
2375
2376 if not oMatch:
2377 if ch != '#':
2378 aoStmts.append(McCppGeneric(sRawCode[off : offEnd + 1], fDecode));
2379 else:
2380 aoStmts.append(McCppPreProc(sRawCode[off : offEnd + 1]));
2381 off = offEnd + 1;
2382 elif oMatch.group(1).startswith('if'):
2383 #
2384 # if () xxx [else yyy] statement.
2385 #
2386 oStmt = McCppCond(sCondExpr, fDecode);
2387 aoStmts.append(oStmt);
2388 off = offEnd + 1;
2389
2390 # Following the if () we can either have a {} containing zero or more statements
2391 # or we have a single statement.
2392 offBlock1 = self.skipSpacesAt(sRawCode, offEnd + 1, offStop);
2393 if sRawCode[offBlock1] == '{':
2394 offBlock1End = self.findClosingBraces(sRawCode, offBlock1, offStop);
2395 if offBlock1End < 0:
2396 self.raiseDecodeError(sRawCode, offBlock1, 'No matching "}" closing if block');
2397 offBlock1 += 1;
2398 else:
2399 offBlock1End = sRawCode.find(';', offBlock1, offStop);
2400 if offBlock1End < 0:
2401 self.raiseDecodeError(sRawCode, off, 'Expected ";" terminating one-line if block"');
2402
2403 oStmt.aoIfBranch = self.decodeCode(sRawCode, offBlock1, offBlock1End, iLevel + 1);
2404
2405 # The else is optional and can likewise be followed by {} or a single statement.
2406 off = self.skipSpacesAt(sRawCode, offBlock1End + 1, offStop);
2407 if self.isSubstrAt(sRawCode, off, 'else') and sRawCode[off + len('else')].isspace():
2408 offBlock2 = self.skipSpacesAt(sRawCode, off + len('else'), offStop);
2409 if sRawCode[offBlock2] == '{':
2410 offBlock2End = self.findClosingBraces(sRawCode, offBlock2, offStop);
2411 if offBlock2End < 0:
2412 self.raiseDecodeError(sRawCode, offBlock2, 'No matching "}" closing else block');
2413 offBlock2 += 1;
2414 else:
2415 offBlock2End = sRawCode.find(';', offBlock2, offStop);
2416 if offBlock2End < 0:
2417 self.raiseDecodeError(sRawCode, off, 'Expected ";" terminating one-line else block"');
2418
2419 oStmt.aoElseBranch = self.decodeCode(sRawCode, offBlock2, offBlock2End, iLevel + 1);
2420 off = offBlock2End + 1;
2421
2422 elif oMatch.group(1) == 'else':
2423 # Problematic 'else' branch, typically involving #ifdefs.
2424 self.raiseDecodeError(sRawCode, off, 'Mixed up else/#ifdef or something confusing us.');
2425
2426 return aoStmts;
2427
2428 def decode(self):
2429 """
2430 Decodes the block, populating self.aoStmts.
2431 Returns the statement list.
2432 Raises ParserException on failure.
2433 """
2434 self.aoStmts = self.decodeCode(''.join(self.asLines));
2435 return self.aoStmts;
2436
2437
2438## IEM_MC_XXX -> parser dictionary.
2439# The raw table was generated via the following command
2440# sed -n -e "s/^# *define *\(IEM_MC_[A-Z_0-9]*\)[ (].*$/ '\1': McBlock.parseMcGeneric,/p" include/IEMMc.h \
2441# | sort | uniq | gawk "{printf """ %%-60s %%s\n""", $1, $2}"
2442g_dMcStmtParsers = {
2443 'IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE': McBlock.parseMcGeneric,
2444 'IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ': McBlock.parseMcGeneric,
2445 'IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE': McBlock.parseMcGeneric,
2446 'IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ': McBlock.parseMcGeneric,
2447 'IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE': McBlock.parseMcGeneric,
2448 'IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ': McBlock.parseMcGeneric,
2449 'IEM_MC_ADD_GREG_U16': McBlock.parseMcGeneric,
2450 'IEM_MC_ADD_GREG_U16_TO_LOCAL': McBlock.parseMcGeneric,
2451 'IEM_MC_ADD_GREG_U32': McBlock.parseMcGeneric,
2452 'IEM_MC_ADD_GREG_U32_TO_LOCAL': McBlock.parseMcGeneric,
2453 'IEM_MC_ADD_GREG_U64': McBlock.parseMcGeneric,
2454 'IEM_MC_ADD_GREG_U64_TO_LOCAL': McBlock.parseMcGeneric,
2455 'IEM_MC_ADD_GREG_U8': McBlock.parseMcGeneric,
2456 'IEM_MC_ADD_GREG_U8_TO_LOCAL': McBlock.parseMcGeneric,
2457 'IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR': McBlock.parseMcGeneric,
2458 'IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR': McBlock.parseMcGeneric,
2459 'IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR': McBlock.parseMcGeneric,
2460 'IEM_MC_ADVANCE_RIP_AND_FINISH': McBlock.parseMcGeneric,
2461 'IEM_MC_AND_2LOCS_U32': McBlock.parseMcGeneric,
2462 'IEM_MC_AND_ARG_U16': McBlock.parseMcGeneric,
2463 'IEM_MC_AND_ARG_U32': McBlock.parseMcGeneric,
2464 'IEM_MC_AND_ARG_U64': McBlock.parseMcGeneric,
2465 'IEM_MC_AND_GREG_U16': McBlock.parseMcGeneric,
2466 'IEM_MC_AND_GREG_U32': McBlock.parseMcGeneric,
2467 'IEM_MC_AND_GREG_U64': McBlock.parseMcGeneric,
2468 'IEM_MC_AND_GREG_U8': McBlock.parseMcGeneric,
2469 'IEM_MC_AND_LOCAL_U16': McBlock.parseMcGeneric,
2470 'IEM_MC_AND_LOCAL_U32': McBlock.parseMcGeneric,
2471 'IEM_MC_AND_LOCAL_U64': McBlock.parseMcGeneric,
2472 'IEM_MC_AND_LOCAL_U8': McBlock.parseMcGeneric,
2473 'IEM_MC_ARG': McBlock.parseMcArg,
2474 'IEM_MC_ARG_CONST': McBlock.parseMcArgConst,
2475 'IEM_MC_ARG_LOCAL_EFLAGS': McBlock.parseMcArgLocalEFlags,
2476 'IEM_MC_ARG_LOCAL_REF': McBlock.parseMcArgLocalRef,
2477 'IEM_MC_ASSIGN': McBlock.parseMcGeneric,
2478 'IEM_MC_ASSIGN_TO_SMALLER': McBlock.parseMcGeneric,
2479 'IEM_MC_BEGIN': McBlock.parseMcGeneric,
2480 'IEM_MC_BSWAP_LOCAL_U16': McBlock.parseMcGeneric,
2481 'IEM_MC_BSWAP_LOCAL_U32': McBlock.parseMcGeneric,
2482 'IEM_MC_BSWAP_LOCAL_U64': McBlock.parseMcGeneric,
2483 'IEM_MC_CALC_RM_EFF_ADDR': McBlock.parseMcGeneric,
2484 'IEM_MC_CALL_AIMPL_3': McBlock.parseMcCallAImpl,
2485 'IEM_MC_CALL_AIMPL_4': McBlock.parseMcCallAImpl,
2486 'IEM_MC_CALL_AVX_AIMPL_2': McBlock.parseMcCallAvxAImpl,
2487 'IEM_MC_CALL_AVX_AIMPL_3': McBlock.parseMcCallAvxAImpl,
2488 'IEM_MC_CALL_CIMPL_0': McBlock.parseMcCallCImpl,
2489 'IEM_MC_CALL_CIMPL_1': McBlock.parseMcCallCImpl,
2490 'IEM_MC_CALL_CIMPL_2': McBlock.parseMcCallCImpl,
2491 'IEM_MC_CALL_CIMPL_3': McBlock.parseMcCallCImpl,
2492 'IEM_MC_CALL_CIMPL_4': McBlock.parseMcCallCImpl,
2493 'IEM_MC_CALL_CIMPL_5': McBlock.parseMcCallCImpl,
2494 'IEM_MC_CALL_FPU_AIMPL_1': McBlock.parseMcCallFpuAImpl,
2495 'IEM_MC_CALL_FPU_AIMPL_2': McBlock.parseMcCallFpuAImpl,
2496 'IEM_MC_CALL_FPU_AIMPL_3': McBlock.parseMcCallFpuAImpl,
2497 'IEM_MC_CALL_MMX_AIMPL_2': McBlock.parseMcCallMmxAImpl,
2498 'IEM_MC_CALL_MMX_AIMPL_3': McBlock.parseMcCallMmxAImpl,
2499 'IEM_MC_CALL_SSE_AIMPL_2': McBlock.parseMcCallSseAImpl,
2500 'IEM_MC_CALL_SSE_AIMPL_3': McBlock.parseMcCallSseAImpl,
2501 'IEM_MC_CALL_VOID_AIMPL_0': McBlock.parseMcCallVoidAImpl,
2502 'IEM_MC_CALL_VOID_AIMPL_1': McBlock.parseMcCallVoidAImpl,
2503 'IEM_MC_CALL_VOID_AIMPL_2': McBlock.parseMcCallVoidAImpl,
2504 'IEM_MC_CALL_VOID_AIMPL_3': McBlock.parseMcCallVoidAImpl,
2505 'IEM_MC_CALL_VOID_AIMPL_4': McBlock.parseMcCallVoidAImpl,
2506 'IEM_MC_CLEAR_EFL_BIT': McBlock.parseMcGeneric,
2507 'IEM_MC_CLEAR_FSW_EX': McBlock.parseMcGeneric,
2508 'IEM_MC_CLEAR_HIGH_GREG_U64': McBlock.parseMcGeneric,
2509 'IEM_MC_CLEAR_HIGH_GREG_U64_BY_REF': McBlock.parseMcGeneric,
2510 'IEM_MC_CLEAR_XREG_U32_MASK': McBlock.parseMcGeneric,
2511 'IEM_MC_CLEAR_YREG_128_UP': McBlock.parseMcGeneric,
2512 'IEM_MC_COMMIT_EFLAGS': McBlock.parseMcGeneric,
2513 'IEM_MC_COPY_XREG_U128': McBlock.parseMcGeneric,
2514 'IEM_MC_COPY_YREG_U128_ZX_VLMAX': McBlock.parseMcGeneric,
2515 'IEM_MC_COPY_YREG_U256_ZX_VLMAX': McBlock.parseMcGeneric,
2516 'IEM_MC_COPY_YREG_U64_ZX_VLMAX': McBlock.parseMcGeneric,
2517 'IEM_MC_DEFER_TO_CIMPL_0': McBlock.parseMcGeneric,
2518 'IEM_MC_DEFER_TO_CIMPL_1': McBlock.parseMcGeneric,
2519 'IEM_MC_DEFER_TO_CIMPL_2': McBlock.parseMcGeneric,
2520 'IEM_MC_DEFER_TO_CIMPL_3': McBlock.parseMcGeneric,
2521 'IEM_MC_END': McBlock.parseMcGeneric,
2522 'IEM_MC_FETCH_EFLAGS': McBlock.parseMcGeneric,
2523 'IEM_MC_FETCH_EFLAGS_U8': McBlock.parseMcGeneric,
2524 'IEM_MC_FETCH_FCW': McBlock.parseMcGeneric,
2525 'IEM_MC_FETCH_FSW': McBlock.parseMcGeneric,
2526 'IEM_MC_FETCH_GREG_U16': McBlock.parseMcGeneric,
2527 'IEM_MC_FETCH_GREG_U16_SX_U32': McBlock.parseMcGeneric,
2528 'IEM_MC_FETCH_GREG_U16_SX_U64': McBlock.parseMcGeneric,
2529 'IEM_MC_FETCH_GREG_U16_ZX_U32': McBlock.parseMcGeneric,
2530 'IEM_MC_FETCH_GREG_U16_ZX_U64': McBlock.parseMcGeneric,
2531 'IEM_MC_FETCH_GREG_U32': McBlock.parseMcGeneric,
2532 'IEM_MC_FETCH_GREG_U32_SX_U64': McBlock.parseMcGeneric,
2533 'IEM_MC_FETCH_GREG_U32_ZX_U64': McBlock.parseMcGeneric,
2534 'IEM_MC_FETCH_GREG_U64': McBlock.parseMcGeneric,
2535 'IEM_MC_FETCH_GREG_U64_ZX_U64': McBlock.parseMcGeneric,
2536 'IEM_MC_FETCH_GREG_U8': McBlock.parseMcGeneric,
2537 'IEM_MC_FETCH_GREG_U8_SX_U16': McBlock.parseMcGeneric,
2538 'IEM_MC_FETCH_GREG_U8_SX_U32': McBlock.parseMcGeneric,
2539 'IEM_MC_FETCH_GREG_U8_SX_U64': McBlock.parseMcGeneric,
2540 'IEM_MC_FETCH_GREG_U8_ZX_U16': McBlock.parseMcGeneric,
2541 'IEM_MC_FETCH_GREG_U8_ZX_U32': McBlock.parseMcGeneric,
2542 'IEM_MC_FETCH_GREG_U8_ZX_U64': McBlock.parseMcGeneric,
2543 'IEM_MC_FETCH_MEM_D80': McBlock.parseMcGeneric,
2544 'IEM_MC_FETCH_MEM_I16': McBlock.parseMcGeneric,
2545 'IEM_MC_FETCH_MEM_I32': McBlock.parseMcGeneric,
2546 'IEM_MC_FETCH_MEM_I64': McBlock.parseMcGeneric,
2547 'IEM_MC_FETCH_MEM_R32': McBlock.parseMcGeneric,
2548 'IEM_MC_FETCH_MEM_R64': McBlock.parseMcGeneric,
2549 'IEM_MC_FETCH_MEM_R80': McBlock.parseMcGeneric,
2550 'IEM_MC_FETCH_MEM_S32_SX_U64': McBlock.parseMcGeneric,
2551 'IEM_MC_FETCH_MEM_U128': McBlock.parseMcGeneric,
2552 'IEM_MC_FETCH_MEM_U128_ALIGN_SSE': McBlock.parseMcGeneric,
2553 'IEM_MC_FETCH_MEM_U128_NO_AC': McBlock.parseMcGeneric,
2554 'IEM_MC_FETCH_MEM_U16': McBlock.parseMcGeneric,
2555 'IEM_MC_FETCH_MEM_U16_DISP': McBlock.parseMcGeneric,
2556 'IEM_MC_FETCH_MEM_U16_SX_U32': McBlock.parseMcGeneric,
2557 'IEM_MC_FETCH_MEM_U16_SX_U64': McBlock.parseMcGeneric,
2558 'IEM_MC_FETCH_MEM_U16_ZX_U32': McBlock.parseMcGeneric,
2559 'IEM_MC_FETCH_MEM_U16_ZX_U64': McBlock.parseMcGeneric,
2560 'IEM_MC_FETCH_MEM_U256': McBlock.parseMcGeneric,
2561 'IEM_MC_FETCH_MEM_U256_ALIGN_AVX': McBlock.parseMcGeneric,
2562 'IEM_MC_FETCH_MEM_U256_NO_AC': McBlock.parseMcGeneric,
2563 'IEM_MC_FETCH_MEM_U32': McBlock.parseMcGeneric,
2564 'IEM_MC_FETCH_MEM_U32_DISP': McBlock.parseMcGeneric,
2565 'IEM_MC_FETCH_MEM_U32_SX_U64': McBlock.parseMcGeneric,
2566 'IEM_MC_FETCH_MEM_U32_ZX_U64': McBlock.parseMcGeneric,
2567 'IEM_MC_FETCH_MEM_U64': McBlock.parseMcGeneric,
2568 'IEM_MC_FETCH_MEM_U64_ALIGN_U128': McBlock.parseMcGeneric,
2569 'IEM_MC_FETCH_MEM_U64_DISP': McBlock.parseMcGeneric,
2570 'IEM_MC_FETCH_MEM_U8': McBlock.parseMcGeneric,
2571 'IEM_MC_FETCH_MEM_U8_SX_U16': McBlock.parseMcGeneric,
2572 'IEM_MC_FETCH_MEM_U8_SX_U32': McBlock.parseMcGeneric,
2573 'IEM_MC_FETCH_MEM_U8_SX_U64': McBlock.parseMcGeneric,
2574 'IEM_MC_FETCH_MEM_U8_ZX_U16': McBlock.parseMcGeneric,
2575 'IEM_MC_FETCH_MEM_U8_ZX_U32': McBlock.parseMcGeneric,
2576 'IEM_MC_FETCH_MEM_U8_ZX_U64': McBlock.parseMcGeneric,
2577 'IEM_MC_FETCH_MEM_XMM': McBlock.parseMcGeneric,
2578 'IEM_MC_FETCH_MEM_XMM_ALIGN_SSE': McBlock.parseMcGeneric,
2579 'IEM_MC_FETCH_MEM_XMM_NO_AC': McBlock.parseMcGeneric,
2580 'IEM_MC_FETCH_MEM_XMM_U32': McBlock.parseMcGeneric,
2581 'IEM_MC_FETCH_MEM_XMM_U64': McBlock.parseMcGeneric,
2582 'IEM_MC_FETCH_MEM_YMM': McBlock.parseMcGeneric,
2583 'IEM_MC_FETCH_MEM_YMM_ALIGN_AVX': McBlock.parseMcGeneric,
2584 'IEM_MC_FETCH_MEM_YMM_NO_AC': McBlock.parseMcGeneric,
2585 'IEM_MC_FETCH_MEM16_U8': McBlock.parseMcGeneric,
2586 'IEM_MC_FETCH_MEM32_U8': McBlock.parseMcGeneric,
2587 'IEM_MC_FETCH_MREG_U32': McBlock.parseMcGeneric,
2588 'IEM_MC_FETCH_MREG_U64': McBlock.parseMcGeneric,
2589 'IEM_MC_FETCH_SREG_BASE_U32': McBlock.parseMcGeneric,
2590 'IEM_MC_FETCH_SREG_BASE_U64': McBlock.parseMcGeneric,
2591 'IEM_MC_FETCH_SREG_U16': McBlock.parseMcGeneric,
2592 'IEM_MC_FETCH_SREG_ZX_U32': McBlock.parseMcGeneric,
2593 'IEM_MC_FETCH_SREG_ZX_U64': McBlock.parseMcGeneric,
2594 'IEM_MC_FETCH_XREG_U128': McBlock.parseMcGeneric,
2595 'IEM_MC_FETCH_XREG_U16': McBlock.parseMcGeneric,
2596 'IEM_MC_FETCH_XREG_U32': McBlock.parseMcGeneric,
2597 'IEM_MC_FETCH_XREG_U64': McBlock.parseMcGeneric,
2598 'IEM_MC_FETCH_XREG_U8': McBlock.parseMcGeneric,
2599 'IEM_MC_FETCH_XREG_XMM': McBlock.parseMcGeneric,
2600 'IEM_MC_FETCH_YREG_2ND_U64': McBlock.parseMcGeneric,
2601 'IEM_MC_FETCH_YREG_U128': McBlock.parseMcGeneric,
2602 'IEM_MC_FETCH_YREG_U256': McBlock.parseMcGeneric,
2603 'IEM_MC_FETCH_YREG_U32': McBlock.parseMcGeneric,
2604 'IEM_MC_FETCH_YREG_U64': McBlock.parseMcGeneric,
2605 'IEM_MC_FLIP_EFL_BIT': McBlock.parseMcGeneric,
2606 'IEM_MC_FPU_FROM_MMX_MODE': McBlock.parseMcGeneric,
2607 'IEM_MC_FPU_STACK_DEC_TOP': McBlock.parseMcGeneric,
2608 'IEM_MC_FPU_STACK_FREE': McBlock.parseMcGeneric,
2609 'IEM_MC_FPU_STACK_INC_TOP': McBlock.parseMcGeneric,
2610 'IEM_MC_FPU_STACK_PUSH_OVERFLOW': McBlock.parseMcGeneric,
2611 'IEM_MC_FPU_STACK_PUSH_OVERFLOW_MEM_OP': McBlock.parseMcGeneric,
2612 'IEM_MC_FPU_STACK_PUSH_UNDERFLOW': McBlock.parseMcGeneric,
2613 'IEM_MC_FPU_STACK_PUSH_UNDERFLOW_TWO': McBlock.parseMcGeneric,
2614 'IEM_MC_FPU_STACK_UNDERFLOW': McBlock.parseMcGeneric,
2615 'IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP': McBlock.parseMcGeneric,
2616 'IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP_THEN_POP': McBlock.parseMcGeneric,
2617 'IEM_MC_FPU_STACK_UNDERFLOW_THEN_POP': McBlock.parseMcGeneric,
2618 'IEM_MC_FPU_STACK_UNDERFLOW_THEN_POP_POP': McBlock.parseMcGeneric,
2619 'IEM_MC_FPU_TO_MMX_MODE': McBlock.parseMcGeneric,
2620 'IEM_MC_IF_CX_IS_NZ': McBlock.parseMcGenericCond,
2621 'IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_NOT_SET': McBlock.parseMcGenericCond,
2622 'IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_SET': McBlock.parseMcGenericCond,
2623 'IEM_MC_IF_ECX_IS_NZ': McBlock.parseMcGenericCond,
2624 'IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_NOT_SET': McBlock.parseMcGenericCond,
2625 'IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_SET': McBlock.parseMcGenericCond,
2626 'IEM_MC_IF_EFL_ANY_BITS_SET': McBlock.parseMcGenericCond,
2627 'IEM_MC_IF_EFL_BIT_NOT_SET': McBlock.parseMcGenericCond,
2628 'IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ': McBlock.parseMcGenericCond,
2629 'IEM_MC_IF_EFL_BIT_SET': McBlock.parseMcGenericCond,
2630 'IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE': McBlock.parseMcGenericCond,
2631 'IEM_MC_IF_EFL_BITS_EQ': McBlock.parseMcGenericCond,
2632 'IEM_MC_IF_EFL_BITS_NE': McBlock.parseMcGenericCond,
2633 'IEM_MC_IF_EFL_NO_BITS_SET': McBlock.parseMcGenericCond,
2634 'IEM_MC_IF_FCW_IM': McBlock.parseMcGenericCond,
2635 'IEM_MC_IF_FPUREG_IS_EMPTY': McBlock.parseMcGenericCond,
2636 'IEM_MC_IF_FPUREG_NOT_EMPTY': McBlock.parseMcGenericCond,
2637 'IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80': McBlock.parseMcGenericCond,
2638 'IEM_MC_IF_GREG_BIT_SET': McBlock.parseMcGenericCond,
2639 'IEM_MC_IF_LOCAL_IS_Z': McBlock.parseMcGenericCond,
2640 'IEM_MC_IF_MXCSR_XCPT_PENDING': McBlock.parseMcGenericCond,
2641 'IEM_MC_IF_RCX_IS_NZ': McBlock.parseMcGenericCond,
2642 'IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_NOT_SET': McBlock.parseMcGenericCond,
2643 'IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_SET': McBlock.parseMcGenericCond,
2644 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80': McBlock.parseMcGenericCond,
2645 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80_FIRST': McBlock.parseMcGenericCond,
2646 'IEM_MC_IMPLICIT_AVX_AIMPL_ARGS': McBlock.parseMcGeneric,
2647 'IEM_MC_INT_CLEAR_ZMM_256_UP': McBlock.parseMcGeneric,
2648 'IEM_MC_LOCAL': McBlock.parseMcLocal,
2649 'IEM_MC_LOCAL_CONST': McBlock.parseMcLocalConst,
2650 'IEM_MC_MAYBE_RAISE_AESNI_RELATED_XCPT': McBlock.parseMcGeneric,
2651 'IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT': McBlock.parseMcGeneric,
2652 'IEM_MC_MAYBE_RAISE_AVX2_RELATED_XCPT': McBlock.parseMcGeneric,
2653 'IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE': McBlock.parseMcGeneric,
2654 'IEM_MC_MAYBE_RAISE_FPU_XCPT': McBlock.parseMcGeneric,
2655 'IEM_MC_MAYBE_RAISE_FSGSBASE_XCPT': McBlock.parseMcGeneric,
2656 'IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT': McBlock.parseMcGeneric,
2657 'IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT_CHECK_SSE_OR_MMXEXT': McBlock.parseMcGeneric,
2658 'IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT_EX': McBlock.parseMcGeneric,
2659 'IEM_MC_MAYBE_RAISE_NON_CANONICAL_ADDR_GP0': McBlock.parseMcGeneric,
2660 'IEM_MC_MAYBE_RAISE_PCLMUL_RELATED_XCPT': McBlock.parseMcGeneric,
2661 'IEM_MC_MAYBE_RAISE_SHA_RELATED_XCPT': McBlock.parseMcGeneric,
2662 'IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT': McBlock.parseMcGeneric,
2663 'IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT': McBlock.parseMcGeneric,
2664 'IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT': McBlock.parseMcGeneric,
2665 'IEM_MC_MAYBE_RAISE_SSE3_RELATED_XCPT': McBlock.parseMcGeneric,
2666 'IEM_MC_MAYBE_RAISE_SSE41_RELATED_XCPT': McBlock.parseMcGeneric,
2667 'IEM_MC_MAYBE_RAISE_SSE42_RELATED_XCPT': McBlock.parseMcGeneric,
2668 'IEM_MC_MAYBE_RAISE_SSSE3_RELATED_XCPT': McBlock.parseMcGeneric,
2669 'IEM_MC_MAYBE_RAISE_WAIT_DEVICE_NOT_AVAILABLE': McBlock.parseMcGeneric,
2670 'IEM_MC_MEM_COMMIT_AND_UNMAP': McBlock.parseMcGeneric,
2671 'IEM_MC_MEM_COMMIT_AND_UNMAP_FOR_FPU_STORE': McBlock.parseMcGeneric,
2672 'IEM_MC_MEM_MAP': McBlock.parseMcGeneric,
2673 'IEM_MC_MEM_MAP_EX': McBlock.parseMcGeneric,
2674 'IEM_MC_MERGE_YREG_U32_U96_ZX_VLMAX': McBlock.parseMcGeneric,
2675 'IEM_MC_MERGE_YREG_U64_U64_ZX_VLMAX': McBlock.parseMcGeneric,
2676 'IEM_MC_MERGE_YREG_U64HI_U64HI_ZX_VLMAX': McBlock.parseMcGeneric,
2677 'IEM_MC_MERGE_YREG_U64LO_U64LO_ZX_VLMAX': McBlock.parseMcGeneric,
2678 'IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX': McBlock.parseMcGeneric,
2679 'IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX': McBlock.parseMcGeneric,
2680 'IEM_MC_MODIFIED_MREG': McBlock.parseMcGeneric,
2681 'IEM_MC_MODIFIED_MREG_BY_REF': McBlock.parseMcGeneric,
2682 'IEM_MC_OR_2LOCS_U32': McBlock.parseMcGeneric,
2683 'IEM_MC_OR_GREG_U16': McBlock.parseMcGeneric,
2684 'IEM_MC_OR_GREG_U32': McBlock.parseMcGeneric,
2685 'IEM_MC_OR_GREG_U64': McBlock.parseMcGeneric,
2686 'IEM_MC_OR_GREG_U8': McBlock.parseMcGeneric,
2687 'IEM_MC_OR_LOCAL_U16': McBlock.parseMcGeneric,
2688 'IEM_MC_OR_LOCAL_U32': McBlock.parseMcGeneric,
2689 'IEM_MC_OR_LOCAL_U8': McBlock.parseMcGeneric,
2690 'IEM_MC_POP_U16': McBlock.parseMcGeneric,
2691 'IEM_MC_POP_U32': McBlock.parseMcGeneric,
2692 'IEM_MC_POP_U64': McBlock.parseMcGeneric,
2693 'IEM_MC_PREPARE_AVX_USAGE': McBlock.parseMcGeneric,
2694 'IEM_MC_PREPARE_FPU_USAGE': McBlock.parseMcGeneric,
2695 'IEM_MC_PREPARE_SSE_USAGE': McBlock.parseMcGeneric,
2696 'IEM_MC_PUSH_FPU_RESULT': McBlock.parseMcGeneric,
2697 'IEM_MC_PUSH_FPU_RESULT_MEM_OP': McBlock.parseMcGeneric,
2698 'IEM_MC_PUSH_FPU_RESULT_TWO': McBlock.parseMcGeneric,
2699 'IEM_MC_PUSH_U16': McBlock.parseMcGeneric,
2700 'IEM_MC_PUSH_U32': McBlock.parseMcGeneric,
2701 'IEM_MC_PUSH_U32_SREG': McBlock.parseMcGeneric,
2702 'IEM_MC_PUSH_U64': McBlock.parseMcGeneric,
2703 'IEM_MC_RAISE_DIVIDE_ERROR': McBlock.parseMcGeneric,
2704 'IEM_MC_RAISE_GP0_IF_CPL_NOT_ZERO': McBlock.parseMcGeneric,
2705 'IEM_MC_RAISE_GP0_IF_EFF_ADDR_UNALIGNED': McBlock.parseMcGeneric,
2706 'IEM_MC_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT': McBlock.parseMcGeneric,
2707 'IEM_MC_REF_EFLAGS': McBlock.parseMcGeneric,
2708 'IEM_MC_REF_FPUREG': McBlock.parseMcGeneric,
2709 'IEM_MC_REF_GREG_I32': McBlock.parseMcGeneric,
2710 'IEM_MC_REF_GREG_I32_CONST': McBlock.parseMcGeneric,
2711 'IEM_MC_REF_GREG_I64': McBlock.parseMcGeneric,
2712 'IEM_MC_REF_GREG_I64_CONST': McBlock.parseMcGeneric,
2713 'IEM_MC_REF_GREG_U16': McBlock.parseMcGeneric,
2714 'IEM_MC_REF_GREG_U32': McBlock.parseMcGeneric,
2715 'IEM_MC_REF_GREG_U64': McBlock.parseMcGeneric,
2716 'IEM_MC_REF_GREG_U8': McBlock.parseMcGeneric,
2717 'IEM_MC_REF_LOCAL': McBlock.parseMcGeneric,
2718 'IEM_MC_REF_MREG_U32_CONST': McBlock.parseMcGeneric,
2719 'IEM_MC_REF_MREG_U64': McBlock.parseMcGeneric,
2720 'IEM_MC_REF_MREG_U64_CONST': McBlock.parseMcGeneric,
2721 'IEM_MC_REF_MXCSR': McBlock.parseMcGeneric,
2722 'IEM_MC_REF_XREG_R32_CONST': McBlock.parseMcGeneric,
2723 'IEM_MC_REF_XREG_R64_CONST': McBlock.parseMcGeneric,
2724 'IEM_MC_REF_XREG_U128': McBlock.parseMcGeneric,
2725 'IEM_MC_REF_XREG_U128_CONST': McBlock.parseMcGeneric,
2726 'IEM_MC_REF_XREG_U32_CONST': McBlock.parseMcGeneric,
2727 'IEM_MC_REF_XREG_U64_CONST': McBlock.parseMcGeneric,
2728 'IEM_MC_REF_XREG_XMM_CONST': McBlock.parseMcGeneric,
2729 'IEM_MC_REF_YREG_U128': McBlock.parseMcGeneric,
2730 'IEM_MC_REF_YREG_U128_CONST': McBlock.parseMcGeneric,
2731 'IEM_MC_REF_YREG_U64_CONST': McBlock.parseMcGeneric,
2732 'IEM_MC_REL_JMP_S16_AND_FINISH': McBlock.parseMcGeneric,
2733 'IEM_MC_REL_JMP_S32_AND_FINISH': McBlock.parseMcGeneric,
2734 'IEM_MC_REL_JMP_S8_AND_FINISH': McBlock.parseMcGeneric,
2735 'IEM_MC_RETURN_ON_FAILURE': McBlock.parseMcGeneric,
2736 'IEM_MC_SAR_LOCAL_S16': McBlock.parseMcGeneric,
2737 'IEM_MC_SAR_LOCAL_S32': McBlock.parseMcGeneric,
2738 'IEM_MC_SAR_LOCAL_S64': McBlock.parseMcGeneric,
2739 'IEM_MC_SET_EFL_BIT': McBlock.parseMcGeneric,
2740 'IEM_MC_SET_FPU_RESULT': McBlock.parseMcGeneric,
2741 'IEM_MC_SET_RIP_U16_AND_FINISH': McBlock.parseMcGeneric,
2742 'IEM_MC_SET_RIP_U32_AND_FINISH': McBlock.parseMcGeneric,
2743 'IEM_MC_SET_RIP_U64_AND_FINISH': McBlock.parseMcGeneric,
2744 'IEM_MC_SHL_LOCAL_S16': McBlock.parseMcGeneric,
2745 'IEM_MC_SHL_LOCAL_S32': McBlock.parseMcGeneric,
2746 'IEM_MC_SHL_LOCAL_S64': McBlock.parseMcGeneric,
2747 'IEM_MC_SHR_LOCAL_U8': McBlock.parseMcGeneric,
2748 'IEM_MC_SSE_UPDATE_MXCSR': McBlock.parseMcGeneric,
2749 'IEM_MC_STORE_FPU_RESULT': McBlock.parseMcGeneric,
2750 'IEM_MC_STORE_FPU_RESULT_MEM_OP': McBlock.parseMcGeneric,
2751 'IEM_MC_STORE_FPU_RESULT_THEN_POP': McBlock.parseMcGeneric,
2752 'IEM_MC_STORE_FPU_RESULT_WITH_MEM_OP_THEN_POP': McBlock.parseMcGeneric,
2753 'IEM_MC_STORE_FPUREG_R80_SRC_REF': McBlock.parseMcGeneric,
2754 'IEM_MC_STORE_GREG_I64': McBlock.parseMcGeneric,
2755 'IEM_MC_STORE_GREG_U16': McBlock.parseMcGeneric,
2756 'IEM_MC_STORE_GREG_U16_CONST': McBlock.parseMcGeneric,
2757 'IEM_MC_STORE_GREG_U32': McBlock.parseMcGeneric,
2758 'IEM_MC_STORE_GREG_U32_CONST': McBlock.parseMcGeneric,
2759 'IEM_MC_STORE_GREG_U64': McBlock.parseMcGeneric,
2760 'IEM_MC_STORE_GREG_U64_CONST': McBlock.parseMcGeneric,
2761 'IEM_MC_STORE_GREG_U8': McBlock.parseMcGeneric,
2762 'IEM_MC_STORE_GREG_U8_CONST': McBlock.parseMcGeneric,
2763 'IEM_MC_STORE_MEM_I16_CONST_BY_REF': McBlock.parseMcGeneric,
2764 'IEM_MC_STORE_MEM_I32_CONST_BY_REF': McBlock.parseMcGeneric,
2765 'IEM_MC_STORE_MEM_I64_CONST_BY_REF': McBlock.parseMcGeneric,
2766 'IEM_MC_STORE_MEM_I8_CONST_BY_REF': McBlock.parseMcGeneric,
2767 'IEM_MC_STORE_MEM_INDEF_D80_BY_REF': McBlock.parseMcGeneric,
2768 'IEM_MC_STORE_MEM_NEG_QNAN_R32_BY_REF': McBlock.parseMcGeneric,
2769 'IEM_MC_STORE_MEM_NEG_QNAN_R64_BY_REF': McBlock.parseMcGeneric,
2770 'IEM_MC_STORE_MEM_NEG_QNAN_R80_BY_REF': McBlock.parseMcGeneric,
2771 'IEM_MC_STORE_MEM_U128': McBlock.parseMcGeneric,
2772 'IEM_MC_STORE_MEM_U128_ALIGN_SSE': McBlock.parseMcGeneric,
2773 'IEM_MC_STORE_MEM_U16': McBlock.parseMcGeneric,
2774 'IEM_MC_STORE_MEM_U16_CONST': McBlock.parseMcGeneric,
2775 'IEM_MC_STORE_MEM_U256': McBlock.parseMcGeneric,
2776 'IEM_MC_STORE_MEM_U256_ALIGN_AVX': McBlock.parseMcGeneric,
2777 'IEM_MC_STORE_MEM_U32': McBlock.parseMcGeneric,
2778 'IEM_MC_STORE_MEM_U32_CONST': McBlock.parseMcGeneric,
2779 'IEM_MC_STORE_MEM_U64': McBlock.parseMcGeneric,
2780 'IEM_MC_STORE_MEM_U64_CONST': McBlock.parseMcGeneric,
2781 'IEM_MC_STORE_MEM_U8': McBlock.parseMcGeneric,
2782 'IEM_MC_STORE_MEM_U8_CONST': McBlock.parseMcGeneric,
2783 'IEM_MC_STORE_MREG_U32_ZX_U64': McBlock.parseMcGeneric,
2784 'IEM_MC_STORE_MREG_U64': McBlock.parseMcGeneric,
2785 'IEM_MC_STORE_SREG_BASE_U32': McBlock.parseMcGeneric,
2786 'IEM_MC_STORE_SREG_BASE_U64': McBlock.parseMcGeneric,
2787 'IEM_MC_STORE_SSE_RESULT': McBlock.parseMcGeneric,
2788 'IEM_MC_STORE_XREG_HI_U64': McBlock.parseMcGeneric,
2789 'IEM_MC_STORE_XREG_R32': McBlock.parseMcGeneric,
2790 'IEM_MC_STORE_XREG_R64': McBlock.parseMcGeneric,
2791 'IEM_MC_STORE_XREG_U128': McBlock.parseMcGeneric,
2792 'IEM_MC_STORE_XREG_U16': McBlock.parseMcGeneric,
2793 'IEM_MC_STORE_XREG_U32': McBlock.parseMcGeneric,
2794 'IEM_MC_STORE_XREG_U32_U128': McBlock.parseMcGeneric,
2795 'IEM_MC_STORE_XREG_U32_ZX_U128': McBlock.parseMcGeneric,
2796 'IEM_MC_STORE_XREG_U64': McBlock.parseMcGeneric,
2797 'IEM_MC_STORE_XREG_U64_ZX_U128': McBlock.parseMcGeneric,
2798 'IEM_MC_STORE_XREG_U8': McBlock.parseMcGeneric,
2799 'IEM_MC_STORE_XREG_XMM': McBlock.parseMcGeneric,
2800 'IEM_MC_STORE_XREG_XMM_U32': McBlock.parseMcGeneric,
2801 'IEM_MC_STORE_XREG_XMM_U64': McBlock.parseMcGeneric,
2802 'IEM_MC_STORE_YREG_U128_ZX_VLMAX': McBlock.parseMcGeneric,
2803 'IEM_MC_STORE_YREG_U256_ZX_VLMAX': McBlock.parseMcGeneric,
2804 'IEM_MC_STORE_YREG_U32_ZX_VLMAX': McBlock.parseMcGeneric,
2805 'IEM_MC_STORE_YREG_U64_ZX_VLMAX': McBlock.parseMcGeneric,
2806 'IEM_MC_SUB_GREG_U16': McBlock.parseMcGeneric,
2807 'IEM_MC_SUB_GREG_U32': McBlock.parseMcGeneric,
2808 'IEM_MC_SUB_GREG_U64': McBlock.parseMcGeneric,
2809 'IEM_MC_SUB_GREG_U8': McBlock.parseMcGeneric,
2810 'IEM_MC_SUB_LOCAL_U16': McBlock.parseMcGeneric,
2811 'IEM_MC_UPDATE_FPU_OPCODE_IP': McBlock.parseMcGeneric,
2812 'IEM_MC_UPDATE_FSW': McBlock.parseMcGeneric,
2813 'IEM_MC_UPDATE_FSW_CONST': McBlock.parseMcGeneric,
2814 'IEM_MC_UPDATE_FSW_THEN_POP': McBlock.parseMcGeneric,
2815 'IEM_MC_UPDATE_FSW_THEN_POP_POP': McBlock.parseMcGeneric,
2816 'IEM_MC_UPDATE_FSW_WITH_MEM_OP': McBlock.parseMcGeneric,
2817 'IEM_MC_UPDATE_FSW_WITH_MEM_OP_THEN_POP': McBlock.parseMcGeneric,
2818};
2819
2820## List of microcode blocks.
2821g_aoMcBlocks = [] # type: list(McBlock)
2822
2823
2824
2825class ParserException(Exception):
2826 """ Parser exception """
2827 def __init__(self, sMessage):
2828 Exception.__init__(self, sMessage);
2829
2830
2831class SimpleParser(object): # pylint: disable=too-many-instance-attributes
2832 """
2833 Parser of IEMAllInstruction*.cpp.h instruction specifications.
2834 """
2835
2836 ## @name Parser state.
2837 ## @{
2838 kiCode = 0;
2839 kiCommentMulti = 1;
2840 ## @}
2841
2842 class Macro(object):
2843 """ Macro """
2844 def __init__(self, sName, asArgs, sBody, iLine):
2845 self.sName = sName; ##< The macro name.
2846 self.asArgs = asArgs; ##< None if simple macro, list of parameters otherwise.
2847 self.sBody = sBody;
2848 self.iLine = iLine;
2849 self.oReArgMatch = re.compile(r'(\s*##\s*|\b)(' + '|'.join(asArgs) + r')(\s*##\s*|\b)') if asArgs else None;
2850
2851 @staticmethod
2852 def _needSpace(ch):
2853 """ This is just to make the expanded output a bit prettier. """
2854 return ch.isspace() and ch != '(';
2855
2856 def expandMacro(self, oParent, asArgs = None):
2857 """ Expands the macro body with the given arguments. """
2858 _ = oParent;
2859 sBody = self.sBody;
2860
2861 if self.oReArgMatch:
2862 assert len(asArgs) == len(self.asArgs);
2863 #oParent.debug('%s: %s' % (self.sName, self.oReArgMatch.pattern,));
2864
2865 dArgs = { self.asArgs[iArg]: sValue for iArg, sValue in enumerate(asArgs) };
2866 oMatch = self.oReArgMatch.search(sBody);
2867 while oMatch:
2868 sName = oMatch.group(2);
2869 #oParent.debug('%s %s..%s (%s)' % (sName, oMatch.start(), oMatch.end(),oMatch.group()));
2870 sValue = dArgs[sName];
2871 sPre = '';
2872 if not oMatch.group(1) and oMatch.start() > 0 and self._needSpace(sBody[oMatch.start()]):
2873 sPre = ' ';
2874 sPost = '';
2875 if not oMatch.group(3) and oMatch.end() < len(sBody) and self._needSpace(sBody[oMatch.end()]):
2876 sPost = ' ';
2877 sBody = sBody[ : oMatch.start()] + sPre + sValue + sPost + sBody[oMatch.end() : ];
2878 oMatch = self.oReArgMatch.search(sBody, oMatch.start() + len(sValue));
2879 else:
2880 assert not asArgs;
2881
2882 return sBody;
2883
2884
2885 def __init__(self, sSrcFile, asLines, sDefaultMap, oInheritMacrosFrom = None):
2886 self.sSrcFile = sSrcFile;
2887 self.asLines = asLines;
2888 self.iLine = 0;
2889 self.iState = self.kiCode;
2890 self.sComment = '';
2891 self.iCommentLine = 0;
2892 self.aoCurInstrs = [] # type: list(Instruction)
2893 self.oCurFunction = None # type: DecoderFunction
2894 self.iMcBlockInFunc = 0;
2895 self.oCurMcBlock = None # type: McBlock
2896 self.dMacros = {} # type: Dict[str,SimpleParser.Macro]
2897 self.oReMacros = None # type: re ##< Regular expression matching invocations of anything in self.dMacros.
2898 if oInheritMacrosFrom:
2899 self.dMacros = dict(oInheritMacrosFrom.dMacros);
2900 self.oReMacros = oInheritMacrosFrom.oReMacros;
2901
2902 assert sDefaultMap in g_dInstructionMaps;
2903 self.oDefaultMap = g_dInstructionMaps[sDefaultMap];
2904
2905 self.cTotalInstr = 0;
2906 self.cTotalStubs = 0;
2907 self.cTotalTagged = 0;
2908 self.cTotalMcBlocks = 0;
2909
2910 self.oReMacroName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
2911 self.oReMnemonic = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
2912 self.oReStatsName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
2913 self.oReFunctionName= re.compile('^iemOp_[A-Za-z_][A-Za-z0-9_]*$');
2914 self.oReGroupName = re.compile('^og_[a-z0-9]+(|_[a-z0-9]+|_[a-z0-9]+_[a-z0-9]+)$');
2915 self.oReDisEnum = re.compile('^OP_[A-Z0-9_]+$');
2916 self.oReFunTable = re.compile('^(IEM_STATIC|static) +const +PFNIEMOP +g_apfn[A-Za-z0-9_]+ *\[ *\d* *\] *= *$');
2917 self.oReComment = re.compile('//.*?$|/\*.*?\*/'); ## Full comments.
2918 self.oReHashDefine = re.compile('^\s*#\s*define\s+(.*)$');
2919 self.oReHashDefine2 = re.compile('(?s)\A\s*([A-Za-z_][A-Za-z0-9_]*)\(([^)]*)\)\s*(.*)\Z'); ##< With arguments.
2920 self.oReHashDefine3 = re.compile('(?s)\A\s*([A-Za-z_][A-Za-z0-9_]*)[^(]\s*(.*)\Z'); ##< Simple, no arguments.
2921 self.oReHashUndef = re.compile('^\s*#\s*undef\s+(.*)$');
2922 self.oReMcBeginEnd = re.compile(r'\bIEM_MC_(BEGIN|END)\s*\(');
2923
2924 self.fDebug = True;
2925 self.fDebugMc = False;
2926 self.fDebugPreProc = False;
2927
2928 self.dTagHandlers = {
2929 '@opbrief': self.parseTagOpBrief,
2930 '@opdesc': self.parseTagOpDesc,
2931 '@opmnemonic': self.parseTagOpMnemonic,
2932 '@op1': self.parseTagOpOperandN,
2933 '@op2': self.parseTagOpOperandN,
2934 '@op3': self.parseTagOpOperandN,
2935 '@op4': self.parseTagOpOperandN,
2936 '@oppfx': self.parseTagOpPfx,
2937 '@opmaps': self.parseTagOpMaps,
2938 '@opcode': self.parseTagOpcode,
2939 '@opcodesub': self.parseTagOpcodeSub,
2940 '@openc': self.parseTagOpEnc,
2941 '@opfltest': self.parseTagOpEFlags,
2942 '@opflmodify': self.parseTagOpEFlags,
2943 '@opflundef': self.parseTagOpEFlags,
2944 '@opflset': self.parseTagOpEFlags,
2945 '@opflclear': self.parseTagOpEFlags,
2946 '@ophints': self.parseTagOpHints,
2947 '@opdisenum': self.parseTagOpDisEnum,
2948 '@opmincpu': self.parseTagOpMinCpu,
2949 '@opcpuid': self.parseTagOpCpuId,
2950 '@opgroup': self.parseTagOpGroup,
2951 '@opunused': self.parseTagOpUnusedInvalid,
2952 '@opinvalid': self.parseTagOpUnusedInvalid,
2953 '@opinvlstyle': self.parseTagOpUnusedInvalid,
2954 '@optest': self.parseTagOpTest,
2955 '@optestign': self.parseTagOpTestIgnore,
2956 '@optestignore': self.parseTagOpTestIgnore,
2957 '@opcopytests': self.parseTagOpCopyTests,
2958 '@oponly': self.parseTagOpOnlyTest,
2959 '@oponlytest': self.parseTagOpOnlyTest,
2960 '@opxcpttype': self.parseTagOpXcptType,
2961 '@opstats': self.parseTagOpStats,
2962 '@opfunction': self.parseTagOpFunction,
2963 '@opdone': self.parseTagOpDone,
2964 };
2965 for i in range(48):
2966 self.dTagHandlers['@optest%u' % (i,)] = self.parseTagOpTestNum;
2967 self.dTagHandlers['@optest[%u]' % (i,)] = self.parseTagOpTestNum;
2968
2969 self.asErrors = [];
2970
2971 def raiseError(self, sMessage):
2972 """
2973 Raise error prefixed with the source and line number.
2974 """
2975 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iLine, sMessage,));
2976
2977 def raiseCommentError(self, iLineInComment, sMessage):
2978 """
2979 Similar to raiseError, but the line number is iLineInComment + self.iCommentLine.
2980 """
2981 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
2982
2983 def error(self, sMessage):
2984 """
2985 Adds an error.
2986 returns False;
2987 """
2988 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iLine, sMessage,));
2989 return False;
2990
2991 def errorOnLine(self, iLine, sMessage):
2992 """
2993 Adds an error.
2994 returns False;
2995 """
2996 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, iLine, sMessage,));
2997 return False;
2998
2999 def errorComment(self, iLineInComment, sMessage):
3000 """
3001 Adds a comment error.
3002 returns False;
3003 """
3004 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
3005 return False;
3006
3007 def printErrors(self):
3008 """
3009 Print the errors to stderr.
3010 Returns number of errors.
3011 """
3012 if self.asErrors:
3013 sys.stderr.write(u''.join(self.asErrors));
3014 return len(self.asErrors);
3015
3016 def debug(self, sMessage):
3017 """
3018 For debugging.
3019 """
3020 if self.fDebug:
3021 print('debug: %s' % (sMessage,), file = sys.stderr);
3022
3023 def stripComments(self, sLine):
3024 """
3025 Returns sLine with comments stripped.
3026
3027 Complains if traces of incomplete multi-line comments are encountered.
3028 """
3029 sLine = self.oReComment.sub(" ", sLine);
3030 if sLine.find('/*') >= 0 or sLine.find('*/') >= 0:
3031 self.error('Unexpected multi-line comment will not be handled correctly. Please simplify.');
3032 return sLine;
3033
3034 def parseFunctionTable(self, sLine):
3035 """
3036 Parses a PFNIEMOP table, updating/checking the @oppfx value.
3037
3038 Note! Updates iLine as it consumes the whole table.
3039 """
3040
3041 #
3042 # Extract the table name.
3043 #
3044 sName = re.search(' *([a-zA-Z_0-9]+) *\[', sLine).group(1);
3045 oMap = g_dInstructionMapsByIemName.get(sName);
3046 if not oMap:
3047 self.debug('No map for PFNIEMOP table: %s' % (sName,));
3048 oMap = self.oDefaultMap; # This is wrong wrong wrong.
3049
3050 #
3051 # All but the g_apfnOneByteMap & g_apfnEscF1_E0toFF tables uses four
3052 # entries per byte:
3053 # no prefix, 066h prefix, f3h prefix, f2h prefix
3054 # Those tables has 256 & 32 entries respectively.
3055 #
3056 cEntriesPerByte = 4;
3057 cValidTableLength = 1024;
3058 asPrefixes = ('none', '0x66', '0xf3', '0xf2');
3059
3060 oEntriesMatch = re.search('\[ *(256|32) *\]', sLine);
3061 if oEntriesMatch:
3062 cEntriesPerByte = 1;
3063 cValidTableLength = int(oEntriesMatch.group(1));
3064 asPrefixes = (None,);
3065
3066 #
3067 # The next line should be '{' and nothing else.
3068 #
3069 if self.iLine >= len(self.asLines) or not re.match('^ *{ *$', self.asLines[self.iLine]):
3070 return self.errorOnLine(self.iLine + 1, 'Expected lone "{" on line following PFNIEMOP table %s start' % (sName, ));
3071 self.iLine += 1;
3072
3073 #
3074 # Parse till we find the end of the table.
3075 #
3076 iEntry = 0;
3077 while self.iLine < len(self.asLines):
3078 # Get the next line and strip comments and spaces (assumes no
3079 # multi-line comments).
3080 sLine = self.asLines[self.iLine];
3081 self.iLine += 1;
3082 sLine = self.stripComments(sLine).strip();
3083
3084 # Split the line up into entries, expanding IEMOP_X4 usage.
3085 asEntries = sLine.split(',');
3086 for i in range(len(asEntries) - 1, -1, -1):
3087 sEntry = asEntries[i].strip();
3088 if sEntry.startswith('IEMOP_X4(') and sEntry[-1] == ')':
3089 sEntry = (sEntry[len('IEMOP_X4('):-1]).strip();
3090 asEntries.insert(i + 1, sEntry);
3091 asEntries.insert(i + 1, sEntry);
3092 asEntries.insert(i + 1, sEntry);
3093 if sEntry:
3094 asEntries[i] = sEntry;
3095 else:
3096 del asEntries[i];
3097
3098 # Process the entries.
3099 for sEntry in asEntries:
3100 if sEntry in ('};', '}'):
3101 if iEntry != cValidTableLength:
3102 return self.error('Wrong table length for %s: %#x, expected %#x' % (sName, iEntry, cValidTableLength, ));
3103 return True;
3104 if sEntry.startswith('iemOp_Invalid'):
3105 pass; # skip
3106 else:
3107 # Look up matching instruction by function.
3108 sPrefix = asPrefixes[iEntry % cEntriesPerByte];
3109 sOpcode = '%#04x' % (iEntry // cEntriesPerByte);
3110 aoInstr = g_dAllInstructionsByFunction.get(sEntry);
3111 if aoInstr:
3112 if not isinstance(aoInstr, list):
3113 aoInstr = [aoInstr,];
3114 oInstr = None;
3115 for oCurInstr in aoInstr:
3116 if oCurInstr.sOpcode == sOpcode and oCurInstr.sPrefix == sPrefix:
3117 pass;
3118 elif oCurInstr.sOpcode == sOpcode and oCurInstr.sPrefix is None:
3119 oCurInstr.sPrefix = sPrefix;
3120 elif oCurInstr.sOpcode is None and oCurInstr.sPrefix is None:
3121 oCurInstr.sOpcode = sOpcode;
3122 oCurInstr.sPrefix = sPrefix;
3123 else:
3124 continue;
3125 oInstr = oCurInstr;
3126 break;
3127 if not oInstr:
3128 oInstr = aoInstr[0].copy(oMap = oMap, sOpcode = sOpcode, sPrefix = sPrefix);
3129 aoInstr.append(oInstr);
3130 g_dAllInstructionsByFunction[sEntry] = aoInstr;
3131 g_aoAllInstructions.append(oInstr);
3132 oMap.aoInstructions.append(oInstr);
3133 else:
3134 self.debug('Function "%s", entry %#04x / byte %#04x in %s, is not associated with an instruction.'
3135 % (sEntry, iEntry, iEntry // cEntriesPerByte, sName,));
3136 iEntry += 1;
3137
3138 return self.error('Unexpected end of file in PFNIEMOP table');
3139
3140 def addInstruction(self, iLine = None):
3141 """
3142 Adds an instruction.
3143 """
3144 oInstr = Instruction(self.sSrcFile, self.iLine if iLine is None else iLine);
3145 g_aoAllInstructions.append(oInstr);
3146 self.aoCurInstrs.append(oInstr);
3147 return oInstr;
3148
3149 def deriveMnemonicAndOperandsFromStats(self, oInstr, sStats):
3150 """
3151 Derives the mnemonic and operands from a IEM stats base name like string.
3152 """
3153 if oInstr.sMnemonic is None:
3154 asWords = sStats.split('_');
3155 oInstr.sMnemonic = asWords[0].lower();
3156 if len(asWords) > 1 and not oInstr.aoOperands:
3157 for sType in asWords[1:]:
3158 if sType in g_kdOpTypes:
3159 oInstr.aoOperands.append(Operand(g_kdOpTypes[sType][1], sType));
3160 else:
3161 #return self.error('unknown operand type: %s (instruction: %s)' % (sType, oInstr))
3162 return False;
3163 return True;
3164
3165 def doneInstructionOne(self, oInstr, iLine):
3166 """
3167 Complete the parsing by processing, validating and expanding raw inputs.
3168 """
3169 assert oInstr.iLineCompleted is None;
3170 oInstr.iLineCompleted = iLine;
3171
3172 #
3173 # Specified instructions.
3174 #
3175 if oInstr.cOpTags > 0:
3176 if oInstr.sStats is None:
3177 pass;
3178
3179 #
3180 # Unspecified legacy stuff. We generally only got a few things to go on here.
3181 # /** Opcode 0x0f 0x00 /0. */
3182 # FNIEMOPRM_DEF(iemOp_Grp6_sldt)
3183 #
3184 else:
3185 #if oInstr.sRawOldOpcodes:
3186 #
3187 #if oInstr.sMnemonic:
3188 pass;
3189
3190 #
3191 # Common defaults.
3192 #
3193
3194 # Guess mnemonic and operands from stats if the former is missing.
3195 if oInstr.sMnemonic is None:
3196 if oInstr.sStats is not None:
3197 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sStats);
3198 elif oInstr.sFunction is not None:
3199 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sFunction.replace('iemOp_', ''));
3200
3201 # Derive the disassembler op enum constant from the mnemonic.
3202 if oInstr.sDisEnum is None and oInstr.sMnemonic is not None:
3203 oInstr.sDisEnum = 'OP_' + oInstr.sMnemonic.upper();
3204
3205 # Derive the IEM statistics base name from mnemonic and operand types.
3206 if oInstr.sStats is None:
3207 if oInstr.sFunction is not None:
3208 oInstr.sStats = oInstr.sFunction.replace('iemOp_', '');
3209 elif oInstr.sMnemonic is not None:
3210 oInstr.sStats = oInstr.sMnemonic;
3211 for oOperand in oInstr.aoOperands:
3212 if oOperand.sType:
3213 oInstr.sStats += '_' + oOperand.sType;
3214
3215 # Derive the IEM function name from mnemonic and operand types.
3216 if oInstr.sFunction is None:
3217 if oInstr.sMnemonic is not None:
3218 oInstr.sFunction = 'iemOp_' + oInstr.sMnemonic;
3219 for oOperand in oInstr.aoOperands:
3220 if oOperand.sType:
3221 oInstr.sFunction += '_' + oOperand.sType;
3222 elif oInstr.sStats:
3223 oInstr.sFunction = 'iemOp_' + oInstr.sStats;
3224
3225 #
3226 # Apply default map and then add the instruction to all it's groups.
3227 #
3228 if not oInstr.aoMaps:
3229 oInstr.aoMaps = [ self.oDefaultMap, ];
3230 for oMap in oInstr.aoMaps:
3231 oMap.aoInstructions.append(oInstr);
3232
3233 #
3234 # Derive encoding from operands and maps.
3235 #
3236 if oInstr.sEncoding is None:
3237 if not oInstr.aoOperands:
3238 if oInstr.fUnused and oInstr.sSubOpcode:
3239 oInstr.sEncoding = 'VEX.ModR/M' if oInstr.onlyInVexMaps() else 'ModR/M';
3240 else:
3241 oInstr.sEncoding = 'VEX.fixed' if oInstr.onlyInVexMaps() else 'fixed';
3242 elif oInstr.aoOperands[0].usesModRM():
3243 if (len(oInstr.aoOperands) >= 2 and oInstr.aoOperands[1].sWhere == 'vvvv') \
3244 or oInstr.onlyInVexMaps():
3245 oInstr.sEncoding = 'VEX.ModR/M';
3246 else:
3247 oInstr.sEncoding = 'ModR/M';
3248
3249 #
3250 # Check the opstat value and add it to the opstat indexed dictionary.
3251 #
3252 if oInstr.sStats:
3253 if oInstr.sStats not in g_dAllInstructionsByStat:
3254 g_dAllInstructionsByStat[oInstr.sStats] = oInstr;
3255 else:
3256 self.error('Duplicate opstat value "%s"\nnew: %s\nold: %s'
3257 % (oInstr.sStats, oInstr, g_dAllInstructionsByStat[oInstr.sStats],));
3258
3259 #
3260 # Add to function indexed dictionary. We allow multiple instructions per function.
3261 #
3262 if oInstr.sFunction:
3263 if oInstr.sFunction not in g_dAllInstructionsByFunction:
3264 g_dAllInstructionsByFunction[oInstr.sFunction] = [oInstr,];
3265 else:
3266 g_dAllInstructionsByFunction[oInstr.sFunction].append(oInstr);
3267
3268 #self.debug('%d..%d: %s; %d @op tags' % (oInstr.iLineCreated, oInstr.iLineCompleted, oInstr.sFunction, oInstr.cOpTags));
3269 return True;
3270
3271 def doneInstructions(self, iLineInComment = None, fEndOfFunction = False):
3272 """
3273 Done with current instruction.
3274 """
3275 for oInstr in self.aoCurInstrs:
3276 self.doneInstructionOne(oInstr, self.iLine if iLineInComment is None else self.iCommentLine + iLineInComment);
3277 if oInstr.fStub:
3278 self.cTotalStubs += 1;
3279
3280 self.cTotalInstr += len(self.aoCurInstrs);
3281
3282 self.sComment = '';
3283 self.aoCurInstrs = [];
3284 if fEndOfFunction:
3285 #self.debug('%s: oCurFunction=None' % (self.iLine, ));
3286 if self.oCurFunction:
3287 self.oCurFunction.complete(self.iLine, self.asLines[self.oCurFunction.iBeginLine - 1 : self.iLine]);
3288 self.oCurFunction = None;
3289 self.iMcBlockInFunc = 0;
3290 return True;
3291
3292 def setInstrunctionAttrib(self, sAttrib, oValue, fOverwrite = False):
3293 """
3294 Sets the sAttrib of all current instruction to oValue. If fOverwrite
3295 is False, only None values and empty strings are replaced.
3296 """
3297 for oInstr in self.aoCurInstrs:
3298 if fOverwrite is not True:
3299 oOldValue = getattr(oInstr, sAttrib);
3300 if oOldValue is not None:
3301 continue;
3302 setattr(oInstr, sAttrib, oValue);
3303
3304 def setInstrunctionArrayAttrib(self, sAttrib, iEntry, oValue, fOverwrite = False):
3305 """
3306 Sets the iEntry of the array sAttrib of all current instruction to oValue.
3307 If fOverwrite is False, only None values and empty strings are replaced.
3308 """
3309 for oInstr in self.aoCurInstrs:
3310 aoArray = getattr(oInstr, sAttrib);
3311 while len(aoArray) <= iEntry:
3312 aoArray.append(None);
3313 if fOverwrite is True or aoArray[iEntry] is None:
3314 aoArray[iEntry] = oValue;
3315
3316 def parseCommentOldOpcode(self, asLines):
3317 """ Deals with 'Opcode 0xff /4' like comments """
3318 asWords = asLines[0].split();
3319 if len(asWords) >= 2 \
3320 and asWords[0] == 'Opcode' \
3321 and ( asWords[1].startswith('0x')
3322 or asWords[1].startswith('0X')):
3323 asWords = asWords[:1];
3324 for iWord, sWord in enumerate(asWords):
3325 if sWord.startswith('0X'):
3326 sWord = '0x' + sWord[:2];
3327 asWords[iWord] = asWords;
3328 self.setInstrunctionAttrib('sRawOldOpcodes', ' '.join(asWords));
3329
3330 return False;
3331
3332 def ensureInstructionForOpTag(self, iTagLine):
3333 """ Ensure there is an instruction for the op-tag being parsed. """
3334 if not self.aoCurInstrs:
3335 self.addInstruction(self.iCommentLine + iTagLine);
3336 for oInstr in self.aoCurInstrs:
3337 oInstr.cOpTags += 1;
3338 if oInstr.cOpTags == 1:
3339 self.cTotalTagged += 1;
3340 return self.aoCurInstrs[-1];
3341
3342 @staticmethod
3343 def flattenSections(aasSections):
3344 """
3345 Flattens multiline sections into stripped single strings.
3346 Returns list of strings, on section per string.
3347 """
3348 asRet = [];
3349 for asLines in aasSections:
3350 if asLines:
3351 asRet.append(' '.join([sLine.strip() for sLine in asLines]));
3352 return asRet;
3353
3354 @staticmethod
3355 def flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = '\n'):
3356 """
3357 Flattens sections into a simple stripped string with newlines as
3358 section breaks. The final section does not sport a trailing newline.
3359 """
3360 # Typical: One section with a single line.
3361 if len(aasSections) == 1 and len(aasSections[0]) == 1:
3362 return aasSections[0][0].strip();
3363
3364 sRet = '';
3365 for iSection, asLines in enumerate(aasSections):
3366 if asLines:
3367 if iSection > 0:
3368 sRet += sSectionSep;
3369 sRet += sLineSep.join([sLine.strip() for sLine in asLines]);
3370 return sRet;
3371
3372
3373
3374 ## @name Tag parsers
3375 ## @{
3376
3377 def parseTagOpBrief(self, sTag, aasSections, iTagLine, iEndLine):
3378 """
3379 Tag: \@opbrief
3380 Value: Text description, multiple sections, appended.
3381
3382 Brief description. If not given, it's the first sentence from @opdesc.
3383 """
3384 oInstr = self.ensureInstructionForOpTag(iTagLine);
3385
3386 # Flatten and validate the value.
3387 sBrief = self.flattenAllSections(aasSections);
3388 if not sBrief:
3389 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
3390 if sBrief[-1] != '.':
3391 sBrief = sBrief + '.';
3392 if len(sBrief) > 180:
3393 return self.errorComment(iTagLine, '%s: value too long (max 180 chars): %s' % (sTag, sBrief));
3394 offDot = sBrief.find('.');
3395 while 0 <= offDot < len(sBrief) - 1 and sBrief[offDot + 1] != ' ':
3396 offDot = sBrief.find('.', offDot + 1);
3397 if offDot >= 0 and offDot != len(sBrief) - 1:
3398 return self.errorComment(iTagLine, '%s: only one sentence: %s' % (sTag, sBrief));
3399
3400 # Update the instruction.
3401 if oInstr.sBrief is not None:
3402 return self.errorComment(iTagLine, '%s: attempting to overwrite brief "%s" with "%s"'
3403 % (sTag, oInstr.sBrief, sBrief,));
3404 _ = iEndLine;
3405 return True;
3406
3407 def parseTagOpDesc(self, sTag, aasSections, iTagLine, iEndLine):
3408 """
3409 Tag: \@opdesc
3410 Value: Text description, multiple sections, appended.
3411
3412 It is used to describe instructions.
3413 """
3414 oInstr = self.ensureInstructionForOpTag(iTagLine);
3415 if aasSections:
3416 oInstr.asDescSections.extend(self.flattenSections(aasSections));
3417 return True;
3418
3419 _ = sTag; _ = iEndLine;
3420 return True;
3421
3422 def parseTagOpMnemonic(self, sTag, aasSections, iTagLine, iEndLine):
3423 """
3424 Tag: @opmenmonic
3425 Value: mnemonic
3426
3427 The 'mnemonic' value must be a valid C identifier string. Because of
3428 prefixes, groups and whatnot, there times when the mnemonic isn't that
3429 of an actual assembler mnemonic.
3430 """
3431 oInstr = self.ensureInstructionForOpTag(iTagLine);
3432
3433 # Flatten and validate the value.
3434 sMnemonic = self.flattenAllSections(aasSections);
3435 if not self.oReMnemonic.match(sMnemonic):
3436 return self.errorComment(iTagLine, '%s: invalid menmonic name: "%s"' % (sTag, sMnemonic,));
3437 if oInstr.sMnemonic is not None:
3438 return self.errorComment(iTagLine, '%s: attempting to overwrite menmonic "%s" with "%s"'
3439 % (sTag, oInstr.sMnemonic, sMnemonic,));
3440 oInstr.sMnemonic = sMnemonic
3441
3442 _ = iEndLine;
3443 return True;
3444
3445 def parseTagOpOperandN(self, sTag, aasSections, iTagLine, iEndLine):
3446 """
3447 Tags: \@op1, \@op2, \@op3, \@op4
3448 Value: [where:]type
3449
3450 The 'where' value indicates where the operand is found, like the 'reg'
3451 part of the ModR/M encoding. See Instruction.kdOperandLocations for
3452 a list.
3453
3454 The 'type' value indicates the operand type. These follow the types
3455 given in the opcode tables in the CPU reference manuals.
3456 See Instruction.kdOperandTypes for a list.
3457
3458 """
3459 oInstr = self.ensureInstructionForOpTag(iTagLine);
3460 idxOp = int(sTag[-1]) - 1;
3461 assert 0 <= idxOp < 4;
3462
3463 # flatten, split up, and validate the "where:type" value.
3464 sFlattened = self.flattenAllSections(aasSections);
3465 asSplit = sFlattened.split(':');
3466 if len(asSplit) == 1:
3467 sType = asSplit[0];
3468 sWhere = None;
3469 elif len(asSplit) == 2:
3470 (sWhere, sType) = asSplit;
3471 else:
3472 return self.errorComment(iTagLine, 'expected %s value on format "[<where>:]<type>" not "%s"' % (sTag, sFlattened,));
3473
3474 if sType not in g_kdOpTypes:
3475 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
3476 % (sTag, sType, ', '.join(g_kdOpTypes.keys()),));
3477 if sWhere is None:
3478 sWhere = g_kdOpTypes[sType][1];
3479 elif sWhere not in g_kdOpLocations:
3480 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
3481 % (sTag, sWhere, ', '.join(g_kdOpLocations.keys()),));
3482
3483 # Insert the operand, refusing to overwrite an existing one.
3484 while idxOp >= len(oInstr.aoOperands):
3485 oInstr.aoOperands.append(None);
3486 if oInstr.aoOperands[idxOp] is not None:
3487 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s:%s" with "%s:%s"'
3488 % ( sTag, oInstr.aoOperands[idxOp].sWhere, oInstr.aoOperands[idxOp].sType,
3489 sWhere, sType,));
3490 oInstr.aoOperands[idxOp] = Operand(sWhere, sType);
3491
3492 _ = iEndLine;
3493 return True;
3494
3495 def parseTagOpMaps(self, sTag, aasSections, iTagLine, iEndLine):
3496 """
3497 Tag: \@opmaps
3498 Value: map[,map2]
3499
3500 Indicates which maps the instruction is in. There is a default map
3501 associated with each input file.
3502 """
3503 oInstr = self.ensureInstructionForOpTag(iTagLine);
3504
3505 # Flatten, split up and validate the value.
3506 sFlattened = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',');
3507 asMaps = sFlattened.split(',');
3508 if not asMaps:
3509 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
3510 for sMap in asMaps:
3511 if sMap not in g_dInstructionMaps:
3512 return self.errorComment(iTagLine, '%s: invalid map value: %s (valid values: %s)'
3513 % (sTag, sMap, ', '.join(g_dInstructionMaps.keys()),));
3514
3515 # Add the maps to the current list. Throw errors on duplicates.
3516 for oMap in oInstr.aoMaps:
3517 if oMap.sName in asMaps:
3518 return self.errorComment(iTagLine, '%s: duplicate map assignment: %s' % (sTag, oMap.sName));
3519
3520 for sMap in asMaps:
3521 oMap = g_dInstructionMaps[sMap];
3522 if oMap not in oInstr.aoMaps:
3523 oInstr.aoMaps.append(oMap);
3524 else:
3525 self.errorComment(iTagLine, '%s: duplicate map assignment (input): %s' % (sTag, sMap));
3526
3527 _ = iEndLine;
3528 return True;
3529
3530 def parseTagOpPfx(self, sTag, aasSections, iTagLine, iEndLine):
3531 """
3532 Tag: \@oppfx
3533 Value: n/a|none|0x66|0xf3|0xf2
3534
3535 Required prefix for the instruction. (In a (E)VEX context this is the
3536 value of the 'pp' field rather than an actual prefix.)
3537 """
3538 oInstr = self.ensureInstructionForOpTag(iTagLine);
3539
3540 # Flatten and validate the value.
3541 sFlattened = self.flattenAllSections(aasSections);
3542 asPrefixes = sFlattened.split();
3543 if len(asPrefixes) > 1:
3544 return self.errorComment(iTagLine, '%s: max one prefix: %s' % (sTag, asPrefixes,));
3545
3546 sPrefix = asPrefixes[0].lower();
3547 if sPrefix == 'none':
3548 sPrefix = 'none';
3549 elif sPrefix == 'n/a':
3550 sPrefix = None;
3551 else:
3552 if len(sPrefix) == 2:
3553 sPrefix = '0x' + sPrefix;
3554 if not _isValidOpcodeByte(sPrefix):
3555 return self.errorComment(iTagLine, '%s: invalid prefix: %s' % (sTag, sPrefix,));
3556
3557 if sPrefix is not None and sPrefix not in g_kdPrefixes:
3558 return self.errorComment(iTagLine, '%s: invalid prefix: %s (valid %s)' % (sTag, sPrefix, g_kdPrefixes,));
3559
3560 # Set it.
3561 if oInstr.sPrefix is not None:
3562 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sPrefix, sPrefix,));
3563 oInstr.sPrefix = sPrefix;
3564
3565 _ = iEndLine;
3566 return True;
3567
3568 def parseTagOpcode(self, sTag, aasSections, iTagLine, iEndLine):
3569 """
3570 Tag: \@opcode
3571 Value: 0x?? | /reg (TODO: | mr/reg | 11 /reg | !11 /reg | 11 mr/reg | !11 mr/reg)
3572
3573 The opcode byte or sub-byte for the instruction in the context of a map.
3574 """
3575 oInstr = self.ensureInstructionForOpTag(iTagLine);
3576
3577 # Flatten and validate the value.
3578 sOpcode = self.flattenAllSections(aasSections);
3579 if _isValidOpcodeByte(sOpcode):
3580 pass;
3581 elif len(sOpcode) == 2 and sOpcode.startswith('/') and sOpcode[-1] in '012345678':
3582 pass;
3583 elif len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1] in '012345678':
3584 pass;
3585 elif len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1] in '012345678':
3586 pass;
3587 else:
3588 return self.errorComment(iTagLine, '%s: invalid opcode: %s' % (sTag, sOpcode,));
3589
3590 # Set it.
3591 if oInstr.sOpcode is not None:
3592 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sOpcode, sOpcode,));
3593 oInstr.sOpcode = sOpcode;
3594
3595 _ = iEndLine;
3596 return True;
3597
3598 def parseTagOpcodeSub(self, sTag, aasSections, iTagLine, iEndLine):
3599 """
3600 Tag: \@opcodesub
3601 Value: none | 11 mr/reg | !11 mr/reg | rex.w=0 | rex.w=1 | vex.l=0 | vex.l=1
3602 | 11 mr/reg vex.l=0 | 11 mr/reg vex.l=1 | !11 mr/reg vex.l=0 | !11 mr/reg vex.l=1
3603
3604 This is a simple way of dealing with encodings where the mod=3 and mod!=3
3605 represents exactly two different instructions. The more proper way would
3606 be to go via maps with two members, but this is faster.
3607 """
3608 oInstr = self.ensureInstructionForOpTag(iTagLine);
3609
3610 # Flatten and validate the value.
3611 sSubOpcode = self.flattenAllSections(aasSections);
3612 if sSubOpcode not in g_kdSubOpcodes:
3613 return self.errorComment(iTagLine, '%s: invalid sub opcode: %s (valid: 11, !11, none)' % (sTag, sSubOpcode,));
3614 sSubOpcode = g_kdSubOpcodes[sSubOpcode][0];
3615
3616 # Set it.
3617 if oInstr.sSubOpcode is not None:
3618 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
3619 % ( sTag, oInstr.sSubOpcode, sSubOpcode,));
3620 oInstr.sSubOpcode = sSubOpcode;
3621
3622 _ = iEndLine;
3623 return True;
3624
3625 def parseTagOpEnc(self, sTag, aasSections, iTagLine, iEndLine):
3626 """
3627 Tag: \@openc
3628 Value: ModR/M|fixed|prefix|<map name>
3629
3630 The instruction operand encoding style.
3631 """
3632 oInstr = self.ensureInstructionForOpTag(iTagLine);
3633
3634 # Flatten and validate the value.
3635 sEncoding = self.flattenAllSections(aasSections);
3636 if sEncoding in g_kdEncodings:
3637 pass;
3638 elif sEncoding in g_dInstructionMaps:
3639 pass;
3640 elif not _isValidOpcodeByte(sEncoding):
3641 return self.errorComment(iTagLine, '%s: invalid encoding: %s' % (sTag, sEncoding,));
3642
3643 # Set it.
3644 if oInstr.sEncoding is not None:
3645 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
3646 % ( sTag, oInstr.sEncoding, sEncoding,));
3647 oInstr.sEncoding = sEncoding;
3648
3649 _ = iEndLine;
3650 return True;
3651
3652 ## EFlags tag to Instruction attribute name.
3653 kdOpFlagToAttr = {
3654 '@opfltest': 'asFlTest',
3655 '@opflmodify': 'asFlModify',
3656 '@opflundef': 'asFlUndefined',
3657 '@opflset': 'asFlSet',
3658 '@opflclear': 'asFlClear',
3659 };
3660
3661 def parseTagOpEFlags(self, sTag, aasSections, iTagLine, iEndLine):
3662 """
3663 Tags: \@opfltest, \@opflmodify, \@opflundef, \@opflset, \@opflclear
3664 Value: <eflags specifier>
3665
3666 """
3667 oInstr = self.ensureInstructionForOpTag(iTagLine);
3668
3669 # Flatten, split up and validate the values.
3670 asFlags = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',').split(',');
3671 if len(asFlags) == 1 and asFlags[0].lower() == 'none':
3672 asFlags = [];
3673 else:
3674 fRc = True;
3675 for iFlag, sFlag in enumerate(asFlags):
3676 if sFlag not in g_kdEFlagsMnemonics:
3677 if sFlag.strip() in g_kdEFlagsMnemonics:
3678 asFlags[iFlag] = sFlag.strip();
3679 else:
3680 fRc = self.errorComment(iTagLine, '%s: invalid EFLAGS value: %s' % (sTag, sFlag,));
3681 if not fRc:
3682 return False;
3683
3684 # Set them.
3685 asOld = getattr(oInstr, self.kdOpFlagToAttr[sTag]);
3686 if asOld is not None:
3687 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, asOld, asFlags,));
3688 setattr(oInstr, self.kdOpFlagToAttr[sTag], asFlags);
3689
3690 _ = iEndLine;
3691 return True;
3692
3693 def parseTagOpHints(self, sTag, aasSections, iTagLine, iEndLine):
3694 """
3695 Tag: \@ophints
3696 Value: Comma or space separated list of flags and hints.
3697
3698 This covers the disassembler flags table and more.
3699 """
3700 oInstr = self.ensureInstructionForOpTag(iTagLine);
3701
3702 # Flatten as a space separated list, split it up and validate the values.
3703 asHints = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
3704 if len(asHints) == 1 and asHints[0].lower() == 'none':
3705 asHints = [];
3706 else:
3707 fRc = True;
3708 for iHint, sHint in enumerate(asHints):
3709 if sHint not in g_kdHints:
3710 if sHint.strip() in g_kdHints:
3711 sHint[iHint] = sHint.strip();
3712 else:
3713 fRc = self.errorComment(iTagLine, '%s: invalid hint value: %s' % (sTag, sHint,));
3714 if not fRc:
3715 return False;
3716
3717 # Append them.
3718 for sHint in asHints:
3719 if sHint not in oInstr.dHints:
3720 oInstr.dHints[sHint] = True; # (dummy value, using dictionary for speed)
3721 else:
3722 self.errorComment(iTagLine, '%s: duplicate hint: %s' % ( sTag, sHint,));
3723
3724 _ = iEndLine;
3725 return True;
3726
3727 def parseTagOpDisEnum(self, sTag, aasSections, iTagLine, iEndLine):
3728 """
3729 Tag: \@opdisenum
3730 Value: OP_XXXX
3731
3732 This is for select a specific (legacy) disassembler enum value for the
3733 instruction.
3734 """
3735 oInstr = self.ensureInstructionForOpTag(iTagLine);
3736
3737 # Flatten and split.
3738 asWords = self.flattenAllSections(aasSections).split();
3739 if len(asWords) != 1:
3740 self.errorComment(iTagLine, '%s: expected exactly one value: %s' % (sTag, asWords,));
3741 if not asWords:
3742 return False;
3743 sDisEnum = asWords[0];
3744 if not self.oReDisEnum.match(sDisEnum):
3745 return self.errorComment(iTagLine, '%s: invalid disassembler OP_XXXX enum: %s (pattern: %s)'
3746 % (sTag, sDisEnum, self.oReDisEnum.pattern));
3747
3748 # Set it.
3749 if oInstr.sDisEnum is not None:
3750 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % (sTag, oInstr.sDisEnum, sDisEnum,));
3751 oInstr.sDisEnum = sDisEnum;
3752
3753 _ = iEndLine;
3754 return True;
3755
3756 def parseTagOpMinCpu(self, sTag, aasSections, iTagLine, iEndLine):
3757 """
3758 Tag: \@opmincpu
3759 Value: <simple CPU name>
3760
3761 Indicates when this instruction was introduced.
3762 """
3763 oInstr = self.ensureInstructionForOpTag(iTagLine);
3764
3765 # Flatten the value, split into words, make sure there's just one, valid it.
3766 asCpus = self.flattenAllSections(aasSections).split();
3767 if len(asCpus) > 1:
3768 self.errorComment(iTagLine, '%s: exactly one CPU name, please: %s' % (sTag, ' '.join(asCpus),));
3769
3770 sMinCpu = asCpus[0];
3771 if sMinCpu in g_kdCpuNames:
3772 oInstr.sMinCpu = sMinCpu;
3773 else:
3774 return self.errorComment(iTagLine, '%s: invalid CPU name: %s (names: %s)'
3775 % (sTag, sMinCpu, ','.join(sorted(g_kdCpuNames)),));
3776
3777 # Set it.
3778 if oInstr.sMinCpu is None:
3779 oInstr.sMinCpu = sMinCpu;
3780 elif oInstr.sMinCpu != sMinCpu:
3781 self.errorComment(iTagLine, '%s: attemting to overwrite "%s" with "%s"' % (sTag, oInstr.sMinCpu, sMinCpu,));
3782
3783 _ = iEndLine;
3784 return True;
3785
3786 def parseTagOpCpuId(self, sTag, aasSections, iTagLine, iEndLine):
3787 """
3788 Tag: \@opcpuid
3789 Value: none | <CPUID flag specifier>
3790
3791 CPUID feature bit which is required for the instruction to be present.
3792 """
3793 oInstr = self.ensureInstructionForOpTag(iTagLine);
3794
3795 # Flatten as a space separated list, split it up and validate the values.
3796 asCpuIds = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
3797 if len(asCpuIds) == 1 and asCpuIds[0].lower() == 'none':
3798 asCpuIds = [];
3799 else:
3800 fRc = True;
3801 for iCpuId, sCpuId in enumerate(asCpuIds):
3802 if sCpuId not in g_kdCpuIdFlags:
3803 if sCpuId.strip() in g_kdCpuIdFlags:
3804 sCpuId[iCpuId] = sCpuId.strip();
3805 else:
3806 fRc = self.errorComment(iTagLine, '%s: invalid CPUID value: %s' % (sTag, sCpuId,));
3807 if not fRc:
3808 return False;
3809
3810 # Append them.
3811 for sCpuId in asCpuIds:
3812 if sCpuId not in oInstr.asCpuIds:
3813 oInstr.asCpuIds.append(sCpuId);
3814 else:
3815 self.errorComment(iTagLine, '%s: duplicate CPUID: %s' % ( sTag, sCpuId,));
3816
3817 _ = iEndLine;
3818 return True;
3819
3820 def parseTagOpGroup(self, sTag, aasSections, iTagLine, iEndLine):
3821 """
3822 Tag: \@opgroup
3823 Value: op_grp1[_subgrp2[_subsubgrp3]]
3824
3825 Instruction grouping.
3826 """
3827 oInstr = self.ensureInstructionForOpTag(iTagLine);
3828
3829 # Flatten as a space separated list, split it up and validate the values.
3830 asGroups = self.flattenAllSections(aasSections).split();
3831 if len(asGroups) != 1:
3832 return self.errorComment(iTagLine, '%s: exactly one group, please: %s' % (sTag, asGroups,));
3833 sGroup = asGroups[0];
3834 if not self.oReGroupName.match(sGroup):
3835 return self.errorComment(iTagLine, '%s: invalid group name: %s (valid: %s)'
3836 % (sTag, sGroup, self.oReGroupName.pattern));
3837
3838 # Set it.
3839 if oInstr.sGroup is not None:
3840 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sGroup, sGroup,));
3841 oInstr.sGroup = sGroup;
3842
3843 _ = iEndLine;
3844 return True;
3845
3846 def parseTagOpUnusedInvalid(self, sTag, aasSections, iTagLine, iEndLine):
3847 """
3848 Tag: \@opunused, \@opinvalid, \@opinvlstyle
3849 Value: <invalid opcode behaviour style>
3850
3851 The \@opunused indicates the specification is for a currently unused
3852 instruction encoding.
3853
3854 The \@opinvalid indicates the specification is for an invalid currently
3855 instruction encoding (like UD2).
3856
3857 The \@opinvlstyle just indicates how CPUs decode the instruction when
3858 not supported (\@opcpuid, \@opmincpu) or disabled.
3859 """
3860 oInstr = self.ensureInstructionForOpTag(iTagLine);
3861
3862 # Flatten as a space separated list, split it up and validate the values.
3863 asStyles = self.flattenAllSections(aasSections).split();
3864 if len(asStyles) != 1:
3865 return self.errorComment(iTagLine, '%s: exactly one invalid behviour style, please: %s' % (sTag, asStyles,));
3866 sStyle = asStyles[0];
3867 if sStyle not in g_kdInvalidStyles:
3868 return self.errorComment(iTagLine, '%s: invalid invalid behaviour style: %s (valid: %s)'
3869 % (sTag, sStyle, g_kdInvalidStyles.keys(),));
3870 # Set it.
3871 if oInstr.sInvalidStyle is not None:
3872 return self.errorComment(iTagLine,
3873 '%s: attempting to overwrite "%s" with "%s" (only one @opunused, @opinvalid, @opinvlstyle)'
3874 % ( sTag, oInstr.sInvalidStyle, sStyle,));
3875 oInstr.sInvalidStyle = sStyle;
3876 if sTag == '@opunused':
3877 oInstr.fUnused = True;
3878 elif sTag == '@opinvalid':
3879 oInstr.fInvalid = True;
3880
3881 _ = iEndLine;
3882 return True;
3883
3884 def parseTagOpTest(self, sTag, aasSections, iTagLine, iEndLine): # pylint: disable=too-many-locals
3885 """
3886 Tag: \@optest
3887 Value: [<selectors>[ ]?] <inputs> -> <outputs>
3888 Example: mode==64bit / in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
3889
3890 The main idea here is to generate basic instruction tests.
3891
3892 The probably simplest way of handling the diverse input, would be to use
3893 it to produce size optimized byte code for a simple interpreter that
3894 modifies the register input and output states.
3895
3896 An alternative to the interpreter would be creating multiple tables,
3897 but that becomes rather complicated wrt what goes where and then to use
3898 them in an efficient manner.
3899 """
3900 oInstr = self.ensureInstructionForOpTag(iTagLine);
3901
3902 #
3903 # Do it section by section.
3904 #
3905 for asSectionLines in aasSections:
3906 #
3907 # Sort the input into outputs, inputs and selector conditions.
3908 #
3909 sFlatSection = self.flattenAllSections([asSectionLines,]);
3910 if not sFlatSection:
3911 self.errorComment(iTagLine, '%s: missing value (dbg: aasSections=%s)' % ( sTag, aasSections));
3912 continue;
3913 oTest = InstructionTest(oInstr);
3914
3915 asSelectors = [];
3916 asInputs = [];
3917 asOutputs = [];
3918 asCur = asOutputs;
3919 fRc = True;
3920 asWords = sFlatSection.split();
3921 for iWord in range(len(asWords) - 1, -1, -1):
3922 sWord = asWords[iWord];
3923 # Check for array switchers.
3924 if sWord == '->':
3925 if asCur != asOutputs:
3926 fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % (sTag, sFlatSection,));
3927 break;
3928 asCur = asInputs;
3929 elif sWord == '/':
3930 if asCur != asInputs:
3931 fRc = self.errorComment(iTagLine, '%s: "/" shall only occure once: %s' % (sTag, sFlatSection,));
3932 break;
3933 asCur = asSelectors;
3934 else:
3935 asCur.insert(0, sWord);
3936
3937 #
3938 # Validate and add selectors.
3939 #
3940 for sCond in asSelectors:
3941 sCondExp = TestSelector.kdPredicates.get(sCond, sCond);
3942 oSelector = None;
3943 for sOp in TestSelector.kasCompareOps:
3944 off = sCondExp.find(sOp);
3945 if off >= 0:
3946 sVariable = sCondExp[:off];
3947 sValue = sCondExp[off + len(sOp):];
3948 if sVariable in TestSelector.kdVariables:
3949 if sValue in TestSelector.kdVariables[sVariable]:
3950 oSelector = TestSelector(sVariable, sOp, sValue);
3951 else:
3952 self.errorComment(iTagLine, '%s: invalid condition value "%s" in "%s" (valid: %s)'
3953 % ( sTag, sValue, sCond,
3954 TestSelector.kdVariables[sVariable].keys(),));
3955 else:
3956 self.errorComment(iTagLine, '%s: invalid condition variable "%s" in "%s" (valid: %s)'
3957 % ( sTag, sVariable, sCond, TestSelector.kdVariables.keys(),));
3958 break;
3959 if oSelector is not None:
3960 for oExisting in oTest.aoSelectors:
3961 if oExisting.sVariable == oSelector.sVariable:
3962 self.errorComment(iTagLine, '%s: already have a selector for variable "%s" (existing: %s, new: %s)'
3963 % ( sTag, oSelector.sVariable, oExisting, oSelector,));
3964 oTest.aoSelectors.append(oSelector);
3965 else:
3966 fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sCond,));
3967
3968 #
3969 # Validate outputs and inputs, adding them to the test as we go along.
3970 #
3971 for asItems, sDesc, aoDst in [ (asInputs, 'input', oTest.aoInputs), (asOutputs, 'output', oTest.aoOutputs)]:
3972 asValidFieldKinds = [ 'both', sDesc, ];
3973 for sItem in asItems:
3974 oItem = None;
3975 for sOp in TestInOut.kasOperators:
3976 off = sItem.find(sOp);
3977 if off < 0:
3978 continue;
3979 sField = sItem[:off];
3980 sValueType = sItem[off + len(sOp):];
3981 if sField in TestInOut.kdFields \
3982 and TestInOut.kdFields[sField][1] in asValidFieldKinds:
3983 asSplit = sValueType.split(':', 1);
3984 sValue = asSplit[0];
3985 sType = asSplit[1] if len(asSplit) > 1 else TestInOut.kdFields[sField][0];
3986 if sType in TestInOut.kdTypes:
3987 oValid = TestInOut.kdTypes[sType].validate(sValue);
3988 if oValid is True:
3989 if not TestInOut.kdTypes[sType].isAndOrPair(sValue) or sOp == '&|=':
3990 oItem = TestInOut(sField, sOp, sValue, sType);
3991 else:
3992 self.errorComment(iTagLine, '%s: and-or %s value "%s" can only be used with "&|="'
3993 % ( sTag, sDesc, sItem, ));
3994 else:
3995 self.errorComment(iTagLine, '%s: invalid %s value "%s" in "%s" (type: %s): %s'
3996 % ( sTag, sDesc, sValue, sItem, sType, oValid, ));
3997 else:
3998 self.errorComment(iTagLine, '%s: invalid %s type "%s" in "%s" (valid types: %s)'
3999 % ( sTag, sDesc, sType, sItem, TestInOut.kdTypes.keys(),));
4000 else:
4001 self.errorComment(iTagLine, '%s: invalid %s field "%s" in "%s"\nvalid fields: %s'
4002 % ( sTag, sDesc, sField, sItem,
4003 ', '.join([sKey for sKey, asVal in TestInOut.kdFields.items()
4004 if asVal[1] in asValidFieldKinds]),));
4005 break;
4006 if oItem is not None:
4007 for oExisting in aoDst:
4008 if oExisting.sField == oItem.sField and oExisting.sOp == oItem.sOp:
4009 self.errorComment(iTagLine,
4010 '%s: already have a "%s" assignment for field "%s" (existing: %s, new: %s)'
4011 % ( sTag, oItem.sOp, oItem.sField, oExisting, oItem,));
4012 aoDst.append(oItem);
4013 else:
4014 fRc = self.errorComment(iTagLine, '%s: failed to parse assignment: %s' % ( sTag, sItem,));
4015
4016 #
4017 # .
4018 #
4019 if fRc:
4020 oInstr.aoTests.append(oTest);
4021 else:
4022 self.errorComment(iTagLine, '%s: failed to parse test: %s' % (sTag, ' '.join(asWords),));
4023 self.errorComment(iTagLine, '%s: asSelectors=%s / asInputs=%s -> asOutputs=%s'
4024 % (sTag, asSelectors, asInputs, asOutputs,));
4025
4026 _ = iEndLine;
4027 return True;
4028
4029 def parseTagOpTestNum(self, sTag, aasSections, iTagLine, iEndLine):
4030 """
4031 Numbered \@optest tag. Either \@optest42 or \@optest[42].
4032 """
4033 oInstr = self.ensureInstructionForOpTag(iTagLine);
4034
4035 iTest = 0;
4036 if sTag[-1] == ']':
4037 iTest = int(sTag[8:-1]);
4038 else:
4039 iTest = int(sTag[7:]);
4040
4041 if iTest != len(oInstr.aoTests):
4042 self.errorComment(iTagLine, '%s: incorrect test number: %u, actual %u' % (sTag, iTest, len(oInstr.aoTests),));
4043 return self.parseTagOpTest(sTag, aasSections, iTagLine, iEndLine);
4044
4045 def parseTagOpTestIgnore(self, sTag, aasSections, iTagLine, iEndLine):
4046 """
4047 Tag: \@optestign | \@optestignore
4048 Value: <value is ignored>
4049
4050 This is a simple trick to ignore a test while debugging another.
4051
4052 See also \@oponlytest.
4053 """
4054 _ = sTag; _ = aasSections; _ = iTagLine; _ = iEndLine;
4055 return True;
4056
4057 def parseTagOpCopyTests(self, sTag, aasSections, iTagLine, iEndLine):
4058 """
4059 Tag: \@opcopytests
4060 Value: <opstat | function> [..]
4061 Example: \@opcopytests add_Eb_Gb
4062
4063 Trick to avoid duplicating tests for different encodings of the same
4064 operation.
4065 """
4066 oInstr = self.ensureInstructionForOpTag(iTagLine);
4067
4068 # Flatten, validate and append the copy job to the instruction. We execute
4069 # them after parsing all the input so we can handle forward references.
4070 asToCopy = self.flattenAllSections(aasSections).split();
4071 if not asToCopy:
4072 return self.errorComment(iTagLine, '%s: requires at least on reference value' % (sTag,));
4073 for sToCopy in asToCopy:
4074 if sToCopy not in oInstr.asCopyTests:
4075 if self.oReStatsName.match(sToCopy) or self.oReFunctionName.match(sToCopy):
4076 oInstr.asCopyTests.append(sToCopy);
4077 else:
4078 self.errorComment(iTagLine, '%s: invalid instruction reference (opstat or function) "%s" (valid: %s or %s)'
4079 % (sTag, sToCopy, self.oReStatsName.pattern, self.oReFunctionName.pattern));
4080 else:
4081 self.errorComment(iTagLine, '%s: ignoring duplicate "%s"' % (sTag, sToCopy,));
4082
4083 _ = iEndLine;
4084 return True;
4085
4086 def parseTagOpOnlyTest(self, sTag, aasSections, iTagLine, iEndLine):
4087 """
4088 Tag: \@oponlytest | \@oponly
4089 Value: none
4090
4091 Only test instructions with this tag. This is a trick that is handy
4092 for singling out one or two new instructions or tests.
4093
4094 See also \@optestignore.
4095 """
4096 oInstr = self.ensureInstructionForOpTag(iTagLine);
4097
4098 # Validate and add instruction to only test dictionary.
4099 sValue = self.flattenAllSections(aasSections).strip();
4100 if sValue:
4101 return self.errorComment(iTagLine, '%s: does not take any value: %s' % (sTag, sValue));
4102
4103 if oInstr not in g_aoOnlyTestInstructions:
4104 g_aoOnlyTestInstructions.append(oInstr);
4105
4106 _ = iEndLine;
4107 return True;
4108
4109 def parseTagOpXcptType(self, sTag, aasSections, iTagLine, iEndLine):
4110 """
4111 Tag: \@opxcpttype
4112 Value: [none|1|2|3|4|4UA|5|6|7|8|11|12|E1|E1NF|E2|E3|E3NF|E4|E4NF|E5|E5NF|E6|E6NF|E7NF|E9|E9NF|E10|E11|E12|E12NF]
4113
4114 Sets the SSE or AVX exception type (see SDMv2 2.4, 2.7).
4115 """
4116 oInstr = self.ensureInstructionForOpTag(iTagLine);
4117
4118 # Flatten as a space separated list, split it up and validate the values.
4119 asTypes = self.flattenAllSections(aasSections).split();
4120 if len(asTypes) != 1:
4121 return self.errorComment(iTagLine, '%s: exactly one invalid exception type, please: %s' % (sTag, asTypes,));
4122 sType = asTypes[0];
4123 if sType not in g_kdXcptTypes:
4124 return self.errorComment(iTagLine, '%s: invalid invalid exception type: %s (valid: %s)'
4125 % (sTag, sType, sorted(g_kdXcptTypes.keys()),));
4126 # Set it.
4127 if oInstr.sXcptType is not None:
4128 return self.errorComment(iTagLine,
4129 '%s: attempting to overwrite "%s" with "%s" (only one @opxcpttype)'
4130 % ( sTag, oInstr.sXcptType, sType,));
4131 oInstr.sXcptType = sType;
4132
4133 _ = iEndLine;
4134 return True;
4135
4136 def parseTagOpFunction(self, sTag, aasSections, iTagLine, iEndLine):
4137 """
4138 Tag: \@opfunction
4139 Value: <VMM function name>
4140
4141 This is for explicitly setting the IEM function name. Normally we pick
4142 this up from the FNIEMOP_XXX macro invocation after the description, or
4143 generate it from the mnemonic and operands.
4144
4145 It it thought it maybe necessary to set it when specifying instructions
4146 which implementation isn't following immediately or aren't implemented yet.
4147 """
4148 oInstr = self.ensureInstructionForOpTag(iTagLine);
4149
4150 # Flatten and validate the value.
4151 sFunction = self.flattenAllSections(aasSections);
4152 if not self.oReFunctionName.match(sFunction):
4153 return self.errorComment(iTagLine, '%s: invalid VMM function name: "%s" (valid: %s)'
4154 % (sTag, sFunction, self.oReFunctionName.pattern));
4155
4156 if oInstr.sFunction is not None:
4157 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM function name "%s" with "%s"'
4158 % (sTag, oInstr.sFunction, sFunction,));
4159 oInstr.sFunction = sFunction;
4160
4161 _ = iEndLine;
4162 return True;
4163
4164 def parseTagOpStats(self, sTag, aasSections, iTagLine, iEndLine):
4165 """
4166 Tag: \@opstats
4167 Value: <VMM statistics base name>
4168
4169 This is for explicitly setting the statistics name. Normally we pick
4170 this up from the IEMOP_MNEMONIC macro invocation, or generate it from
4171 the mnemonic and operands.
4172
4173 It it thought it maybe necessary to set it when specifying instructions
4174 which implementation isn't following immediately or aren't implemented yet.
4175 """
4176 oInstr = self.ensureInstructionForOpTag(iTagLine);
4177
4178 # Flatten and validate the value.
4179 sStats = self.flattenAllSections(aasSections);
4180 if not self.oReStatsName.match(sStats):
4181 return self.errorComment(iTagLine, '%s: invalid VMM statistics name: "%s" (valid: %s)'
4182 % (sTag, sStats, self.oReStatsName.pattern));
4183
4184 if oInstr.sStats is not None:
4185 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM statistics base name "%s" with "%s"'
4186 % (sTag, oInstr.sStats, sStats,));
4187 oInstr.sStats = sStats;
4188
4189 _ = iEndLine;
4190 return True;
4191
4192 def parseTagOpDone(self, sTag, aasSections, iTagLine, iEndLine):
4193 """
4194 Tag: \@opdone
4195 Value: none
4196
4197 Used to explictily flush the instructions that have been specified.
4198 """
4199 sFlattened = self.flattenAllSections(aasSections);
4200 if sFlattened != '':
4201 return self.errorComment(iTagLine, '%s: takes no value, found: "%s"' % (sTag, sFlattened,));
4202 _ = sTag; _ = iEndLine;
4203 return self.doneInstructions();
4204
4205 ## @}
4206
4207
4208 def parseComment(self):
4209 """
4210 Parse the current comment (self.sComment).
4211
4212 If it's a opcode specifiying comment, we reset the macro stuff.
4213 """
4214 #
4215 # Reject if comment doesn't seem to contain anything interesting.
4216 #
4217 if self.sComment.find('Opcode') < 0 \
4218 and self.sComment.find('@') < 0:
4219 return False;
4220
4221 #
4222 # Split the comment into lines, removing leading asterisks and spaces.
4223 # Also remove leading and trailing empty lines.
4224 #
4225 asLines = self.sComment.split('\n');
4226 for iLine, sLine in enumerate(asLines):
4227 asLines[iLine] = sLine.lstrip().lstrip('*').lstrip();
4228
4229 while asLines and not asLines[0]:
4230 self.iCommentLine += 1;
4231 asLines.pop(0);
4232
4233 while asLines and not asLines[-1]:
4234 asLines.pop(len(asLines) - 1);
4235
4236 #
4237 # Check for old style: Opcode 0x0f 0x12
4238 #
4239 if asLines[0].startswith('Opcode '):
4240 self.parseCommentOldOpcode(asLines);
4241
4242 #
4243 # Look for @op* tagged data.
4244 #
4245 cOpTags = 0;
4246 sFlatDefault = None;
4247 sCurTag = '@default';
4248 iCurTagLine = 0;
4249 asCurSection = [];
4250 aasSections = [ asCurSection, ];
4251 for iLine, sLine in enumerate(asLines):
4252 if not sLine.startswith('@'):
4253 if sLine:
4254 asCurSection.append(sLine);
4255 elif asCurSection:
4256 asCurSection = [];
4257 aasSections.append(asCurSection);
4258 else:
4259 #
4260 # Process the previous tag.
4261 #
4262 if not asCurSection and len(aasSections) > 1:
4263 aasSections.pop(-1);
4264 if sCurTag in self.dTagHandlers:
4265 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
4266 cOpTags += 1;
4267 elif sCurTag.startswith('@op'):
4268 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
4269 elif sCurTag == '@default':
4270 sFlatDefault = self.flattenAllSections(aasSections);
4271 elif '@op' + sCurTag[1:] in self.dTagHandlers:
4272 self.errorComment(iCurTagLine, 'Did you mean "@op%s" rather than "%s"?' % (sCurTag[1:], sCurTag));
4273 elif sCurTag in ['@encoding', '@opencoding']:
4274 self.errorComment(iCurTagLine, 'Did you mean "@openc" rather than "%s"?' % (sCurTag,));
4275
4276 #
4277 # New tag.
4278 #
4279 asSplit = sLine.split(None, 1);
4280 sCurTag = asSplit[0].lower();
4281 if len(asSplit) > 1:
4282 asCurSection = [asSplit[1],];
4283 else:
4284 asCurSection = [];
4285 aasSections = [asCurSection, ];
4286 iCurTagLine = iLine;
4287
4288 #
4289 # Process the final tag.
4290 #
4291 if not asCurSection and len(aasSections) > 1:
4292 aasSections.pop(-1);
4293 if sCurTag in self.dTagHandlers:
4294 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
4295 cOpTags += 1;
4296 elif sCurTag.startswith('@op'):
4297 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
4298 elif sCurTag == '@default':
4299 sFlatDefault = self.flattenAllSections(aasSections);
4300
4301 #
4302 # Don't allow default text in blocks containing @op*.
4303 #
4304 if cOpTags > 0 and sFlatDefault:
4305 self.errorComment(0, 'Untagged comment text is not allowed with @op*: %s' % (sFlatDefault,));
4306
4307 return True;
4308
4309 def parseMacroInvocation(self, sInvocation):
4310 """
4311 Parses a macro invocation.
4312
4313 Returns a tuple, first element is the offset following the macro
4314 invocation. The second element is a list of macro arguments, where the
4315 zero'th is the macro name.
4316 """
4317 # First the name.
4318 offOpen = sInvocation.find('(');
4319 if offOpen <= 0:
4320 self.raiseError("macro invocation open parenthesis not found");
4321 sName = sInvocation[:offOpen].strip();
4322 if not self.oReMacroName.match(sName):
4323 return self.error("invalid macro name '%s'" % (sName,));
4324 asRet = [sName, ];
4325
4326 # Arguments.
4327 iLine = self.iLine;
4328 cDepth = 1;
4329 off = offOpen + 1;
4330 offStart = off;
4331 chQuote = None;
4332 while cDepth > 0:
4333 if off >= len(sInvocation):
4334 if iLine >= len(self.asLines):
4335 self.error('macro invocation beyond end of file');
4336 return (off, asRet);
4337 sInvocation += self.asLines[iLine];
4338 iLine += 1;
4339 ch = sInvocation[off];
4340
4341 if chQuote:
4342 if ch == '\\' and off + 1 < len(sInvocation):
4343 off += 1;
4344 elif ch == chQuote:
4345 chQuote = None;
4346 elif ch in ('"', '\'',):
4347 chQuote = ch;
4348 elif ch in (',', ')',):
4349 if cDepth == 1:
4350 asRet.append(sInvocation[offStart:off].strip());
4351 offStart = off + 1;
4352 if ch == ')':
4353 cDepth -= 1;
4354 elif ch == '(':
4355 cDepth += 1;
4356 off += 1;
4357
4358 return (off, asRet);
4359
4360 def findAndParseMacroInvocationEx(self, sCode, sMacro):
4361 """
4362 Returns (len(sCode), None) if not found, parseMacroInvocation result if found.
4363 """
4364 offHit = sCode.find(sMacro);
4365 if offHit >= 0 and sCode[offHit + len(sMacro):].strip()[0] == '(':
4366 offAfter, asRet = self.parseMacroInvocation(sCode[offHit:])
4367 return (offHit + offAfter, asRet);
4368 return (len(sCode), None);
4369
4370 def findAndParseMacroInvocation(self, sCode, sMacro):
4371 """
4372 Returns None if not found, arguments as per parseMacroInvocation if found.
4373 """
4374 return self.findAndParseMacroInvocationEx(sCode, sMacro)[1];
4375
4376 def findAndParseFirstMacroInvocation(self, sCode, asMacro):
4377 """
4378 Returns same as findAndParseMacroInvocation.
4379 """
4380 for sMacro in asMacro:
4381 asRet = self.findAndParseMacroInvocation(sCode, sMacro);
4382 if asRet is not None:
4383 return asRet;
4384 return None;
4385
4386 def workerIemOpMnemonicEx(self, sMacro, sStats, sAsm, sForm, sUpper, sLower, # pylint: disable=too-many-arguments
4387 sDisHints, sIemHints, asOperands):
4388 """
4389 Processes one of the a IEMOP_MNEMONIC0EX, IEMOP_MNEMONIC1EX, IEMOP_MNEMONIC2EX,
4390 IEMOP_MNEMONIC3EX, and IEMOP_MNEMONIC4EX macros.
4391 """
4392 #
4393 # Some invocation checks.
4394 #
4395 if sUpper != sUpper.upper():
4396 self.error('%s: bad a_Upper parameter: %s' % (sMacro, sUpper,));
4397 if sLower != sLower.lower():
4398 self.error('%s: bad a_Lower parameter: %s' % (sMacro, sLower,));
4399 if sUpper.lower() != sLower:
4400 self.error('%s: a_Upper and a_Lower parameters does not match: %s vs %s' % (sMacro, sUpper, sLower,));
4401 if not self.oReMnemonic.match(sLower):
4402 self.error('%s: invalid a_Lower: %s (valid: %s)' % (sMacro, sLower, self.oReMnemonic.pattern,));
4403
4404 #
4405 # Check if sIemHints tells us to not consider this macro invocation.
4406 #
4407 if sIemHints.find('IEMOPHINT_SKIP_PYTHON') >= 0:
4408 return True;
4409
4410 # Apply to the last instruction only for now.
4411 if not self.aoCurInstrs:
4412 self.addInstruction();
4413 oInstr = self.aoCurInstrs[-1];
4414 if oInstr.iLineMnemonicMacro == -1:
4415 oInstr.iLineMnemonicMacro = self.iLine;
4416 else:
4417 self.error('%s: already saw a IEMOP_MNEMONIC* macro on line %u for this instruction'
4418 % (sMacro, oInstr.iLineMnemonicMacro,));
4419
4420 # Mnemonic
4421 if oInstr.sMnemonic is None:
4422 oInstr.sMnemonic = sLower;
4423 elif oInstr.sMnemonic != sLower:
4424 self.error('%s: current instruction and a_Lower does not match: %s vs %s' % (sMacro, oInstr.sMnemonic, sLower,));
4425
4426 # Process operands.
4427 if len(oInstr.aoOperands) not in [0, len(asOperands)]:
4428 self.error('%s: number of operands given by @opN does not match macro: %s vs %s'
4429 % (sMacro, len(oInstr.aoOperands), len(asOperands),));
4430 for iOperand, sType in enumerate(asOperands):
4431 sWhere = g_kdOpTypes.get(sType, [None, None])[1];
4432 if sWhere is None:
4433 self.error('%s: unknown a_Op%u value: %s' % (sMacro, iOperand + 1, sType));
4434 if iOperand < len(oInstr.aoOperands): # error recovery.
4435 sWhere = oInstr.aoOperands[iOperand].sWhere;
4436 sType = oInstr.aoOperands[iOperand].sType;
4437 else:
4438 sWhere = 'reg';
4439 sType = 'Gb';
4440 if iOperand == len(oInstr.aoOperands):
4441 oInstr.aoOperands.append(Operand(sWhere, sType))
4442 elif oInstr.aoOperands[iOperand].sWhere != sWhere or oInstr.aoOperands[iOperand].sType != sType:
4443 self.error('%s: @op%u and a_Op%u mismatch: %s:%s vs %s:%s'
4444 % (sMacro, iOperand + 1, iOperand + 1, oInstr.aoOperands[iOperand].sWhere,
4445 oInstr.aoOperands[iOperand].sType, sWhere, sType,));
4446
4447 # Encoding.
4448 if sForm not in g_kdIemForms:
4449 self.error('%s: unknown a_Form value: %s' % (sMacro, sForm,));
4450 else:
4451 if oInstr.sEncoding is None:
4452 oInstr.sEncoding = g_kdIemForms[sForm][0];
4453 elif g_kdIemForms[sForm][0] != oInstr.sEncoding:
4454 self.error('%s: current instruction @openc and a_Form does not match: %s vs %s (%s)'
4455 % (sMacro, oInstr.sEncoding, g_kdIemForms[sForm], sForm));
4456
4457 # Check the parameter locations for the encoding.
4458 if g_kdIemForms[sForm][1] is not None:
4459 if len(g_kdIemForms[sForm][1]) > len(oInstr.aoOperands):
4460 self.error('%s: The a_Form=%s has a different operand count: %s (form) vs %s'
4461 % (sMacro, sForm, len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands) ));
4462 else:
4463 for iOperand, sWhere in enumerate(g_kdIemForms[sForm][1]):
4464 if oInstr.aoOperands[iOperand].sWhere != sWhere:
4465 self.error('%s: current instruction @op%u and a_Form location does not match: %s vs %s (%s)'
4466 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sWhere, sWhere, sForm,));
4467 sOpFormMatch = g_kdOpTypes[oInstr.aoOperands[iOperand].sType][4];
4468 if (sOpFormMatch in [ 'REG', 'MEM', ] and sForm.find('_' + sOpFormMatch) < 0) \
4469 or (sOpFormMatch in [ 'FIXED', ] and sForm.find(sOpFormMatch) < 0) \
4470 or (sOpFormMatch == 'RM' and (sForm.find('_MEM') > 0 or sForm.find('_REG') > 0) ) \
4471 or (sOpFormMatch == 'V' and ( not (sForm.find('VEX') > 0 or sForm.find('XOP')) \
4472 or sForm.replace('VEX','').find('V') < 0) ):
4473 self.error('%s: current instruction @op%u and a_Form type does not match: %s/%s vs %s'
4474 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sType, sOpFormMatch, sForm, ));
4475 if len(g_kdIemForms[sForm][1]) < len(oInstr.aoOperands):
4476 for iOperand in range(len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands)):
4477 if oInstr.aoOperands[iOperand].sType != 'FIXED' \
4478 and g_kdOpTypes[oInstr.aoOperands[iOperand].sType][0] != 'IDX_ParseFixedReg':
4479 self.error('%s: Expected FIXED type operand #%u following operands given by a_Form=%s: %s (%s)'
4480 % (sMacro, iOperand, sForm, oInstr.aoOperands[iOperand].sType,
4481 oInstr.aoOperands[iOperand].sWhere));
4482
4483
4484 # Check @opcodesub
4485 if oInstr.sSubOpcode \
4486 and g_kdIemForms[sForm][2] \
4487 and oInstr.sSubOpcode.find(g_kdIemForms[sForm][2]) < 0:
4488 self.error('%s: current instruction @opcodesub and a_Form does not match: %s vs %s (%s)'
4489 % (sMacro, oInstr.sSubOpcode, g_kdIemForms[sForm][2], sForm,));
4490
4491 # Stats.
4492 if not self.oReStatsName.match(sStats):
4493 self.error('%s: invalid a_Stats value: %s' % (sMacro, sStats,));
4494 elif oInstr.sStats is None:
4495 oInstr.sStats = sStats;
4496 elif oInstr.sStats != sStats:
4497 self.error('%s: mismatching @opstats and a_Stats value: %s vs %s'
4498 % (sMacro, oInstr.sStats, sStats,));
4499
4500 # Process the hints (simply merge with @ophints w/o checking anything).
4501 for sHint in sDisHints.split('|'):
4502 sHint = sHint.strip();
4503 if sHint.startswith('DISOPTYPE_'):
4504 sShortHint = sHint[len('DISOPTYPE_'):].lower();
4505 if sShortHint in g_kdHints:
4506 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
4507 else:
4508 self.error('%s: unknown a_fDisHints value: %s' % (sMacro, sHint,));
4509 elif sHint != '0':
4510 self.error('%s: expected a_fDisHints value: %s' % (sMacro, sHint,));
4511
4512 for sHint in sIemHints.split('|'):
4513 sHint = sHint.strip();
4514 if sHint.startswith('IEMOPHINT_'):
4515 sShortHint = sHint[len('IEMOPHINT_'):].lower();
4516 if sShortHint in g_kdHints:
4517 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
4518 else:
4519 self.error('%s: unknown a_fIemHints value: %s' % (sMacro, sHint,));
4520 elif sHint != '0':
4521 self.error('%s: expected a_fIemHints value: %s' % (sMacro, sHint,));
4522
4523 _ = sAsm;
4524 return True;
4525
4526 def workerIemOpMnemonic(self, sMacro, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands):
4527 """
4528 Processes one of the a IEMOP_MNEMONIC0, IEMOP_MNEMONIC1, IEMOP_MNEMONIC2,
4529 IEMOP_MNEMONIC3, and IEMOP_MNEMONIC4 macros.
4530 """
4531 if not asOperands:
4532 return self.workerIemOpMnemonicEx(sMacro, sLower, sLower, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
4533 return self.workerIemOpMnemonicEx(sMacro, sLower + '_' + '_'.join(asOperands), sLower + ' ' + ','.join(asOperands),
4534 sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
4535
4536 def workerIemMcBegin(self, sCode, offBeginStatementInCodeStr, offBeginStatementInLine):
4537 """
4538 Process a IEM_MC_BEGIN macro invocation.
4539 """
4540 if self.fDebugMc:
4541 self.debug('IEM_MC_BEGIN on %s off %s' % (self.iLine, offBeginStatementInLine,));
4542 #self.debug('%s<eos>' % (sCode,));
4543
4544 # Check preconditions.
4545 if not self.oCurFunction:
4546 self.raiseError('IEM_MC_BEGIN w/o current function (%s)' % (sCode,));
4547 if self.oCurMcBlock:
4548 self.raiseError('IEM_MC_BEGIN before IEM_MC_END. Previous IEM_MC_BEGIN at line %u' % (self.oCurMcBlock.iBeginLine,));
4549
4550 # Figure out the indent level the block starts at, adjusting for expanded multiline macros.
4551 cchIndent = offBeginStatementInCodeStr;
4552 offPrevNewline = sCode.rfind('\n', 0, offBeginStatementInCodeStr);
4553 if offPrevNewline >= 0:
4554 cchIndent -= offPrevNewline + 1;
4555 #self.debug('cchIndent=%s offPrevNewline=%s sFunc=%s' % (cchIndent, offPrevNewline, self.oCurFunction.sName));
4556
4557 # Start a new block.
4558 self.oCurMcBlock = McBlock(self.sSrcFile, self.iLine, offBeginStatementInLine,
4559 self.oCurFunction, self.iMcBlockInFunc, cchIndent);
4560 g_aoMcBlocks.append(self.oCurMcBlock);
4561 self.cTotalMcBlocks += 1;
4562 self.iMcBlockInFunc += 1;
4563 return True;
4564
4565 def workerIemMcEnd(self, offEndStatementInLine):
4566 """
4567 Process a IEM_MC_END macro invocation.
4568 """
4569 if self.fDebugMc:
4570 self.debug('IEM_MC_END on %s off %s' % (self.iLine, offEndStatementInLine,));
4571
4572 # Check preconditions.
4573 if not self.oCurMcBlock:
4574 self.raiseError('IEM_MC_END w/o IEM_MC_BEGIN.');
4575
4576 #
4577 # Complete and discard the current block.
4578 #
4579 # HACK ALERT! For blocks orginating from macro expansion the start and
4580 # end line will be the same, but the line has multiple
4581 # newlines inside it. So, we have to do some extra tricks
4582 # to get the lines out of there. We ASSUME macros aren't
4583 # messy, but keep IEM_MC_BEGIN/END on separate lines.
4584 #
4585 if self.iLine > self.oCurMcBlock.iBeginLine:
4586 asLines = self.asLines[self.oCurMcBlock.iBeginLine - 1 : self.iLine];
4587 if not asLines[0].strip().startswith('IEM_MC_BEGIN'):
4588 self.raiseError('IEM_MC_BEGIN is not the first word on the line');
4589 else:
4590 sRawLine = self.asLines[self.iLine - 1];
4591
4592 off = sRawLine.find('\n', offEndStatementInLine);
4593 if off > 0:
4594 sRawLine = sRawLine[:off + 1];
4595
4596 off = sRawLine.rfind('\n', 0, self.oCurMcBlock.offBeginLine) + 1;
4597 sRawLine = sRawLine[off:];
4598 if not sRawLine.strip().startswith('IEM_MC_BEGIN'):
4599 sRawLine = sRawLine[self.oCurMcBlock.offBeginLine - off:]
4600
4601 asLines = [sLine + '\n' for sLine in sRawLine.split('\n')];
4602
4603 self.oCurMcBlock.complete(self.iLine, offEndStatementInLine, asLines);
4604 self.oCurMcBlock = None;
4605 return True;
4606
4607 def workerStartFunction(self, asArgs):
4608 """
4609 Deals with the start of a decoder function.
4610
4611 These are all defined using one of the FNIEMOP*_DEF* and FNIEMOP_*STUB*
4612 macros, so we get a argument list for these where the 0th argument is the
4613 macro name.
4614 """
4615 # Complete any existing function.
4616 if self.oCurFunction:
4617 self.oCurFunction.complete(self.iLine - 1, self.asLines[self.oCurFunction.iBeginLine - 1 : self.iLine - 1]);
4618
4619 # Create the new function.
4620 self.oCurFunction = DecoderFunction(self.sSrcFile, self.iLine, asArgs[1], asArgs);
4621 return True;
4622
4623 def checkCodeForMacro(self, sCode, offLine):
4624 """
4625 Checks code for relevant macro invocation.
4626 """
4627
4628 #
4629 # Scan macro invocations.
4630 #
4631 if sCode.find('(') > 0:
4632 # Look for instruction decoder function definitions. ASSUME single line.
4633 asArgs = self.findAndParseFirstMacroInvocation(sCode,
4634 [ 'FNIEMOP_DEF',
4635 'FNIEMOPRM_DEF',
4636 'FNIEMOP_STUB',
4637 'FNIEMOP_STUB_1',
4638 'FNIEMOP_UD_STUB',
4639 'FNIEMOP_UD_STUB_1' ]);
4640 if asArgs is not None:
4641 self.workerStartFunction(asArgs);
4642 #self.debug('%s: oCurFunction=%s' % (self.iLine, self.oCurFunction.sName,));
4643
4644 if not self.aoCurInstrs:
4645 self.addInstruction();
4646 for oInstr in self.aoCurInstrs:
4647 if oInstr.iLineFnIemOpMacro == -1:
4648 oInstr.iLineFnIemOpMacro = self.iLine;
4649 else:
4650 self.error('%s: already seen a FNIEMOP_XXX macro for %s' % (asArgs[0], oInstr,) );
4651 self.setInstrunctionAttrib('sFunction', asArgs[1]);
4652 self.setInstrunctionAttrib('fStub', asArgs[0].find('STUB') > 0, fOverwrite = True);
4653 self.setInstrunctionAttrib('fUdStub', asArgs[0].find('UD_STUB') > 0, fOverwrite = True);
4654 if asArgs[0].find('STUB') > 0:
4655 self.doneInstructions(fEndOfFunction = True);
4656 return True;
4657
4658 # Check for worker function definitions, so we can get a context for MC blocks.
4659 asArgs = self.findAndParseFirstMacroInvocation(sCode,
4660 [ 'FNIEMOP_DEF_1',
4661 'FNIEMOP_DEF_2', ]);
4662 if asArgs is not None:
4663 self.workerStartFunction(asArgs);
4664 #self.debug('%s: oCurFunction=%s (%s)' % (self.iLine, self.oCurFunction.sName, asArgs[0]));
4665 return True;
4666
4667 # IEMOP_HLP_DONE_VEX_DECODING_*
4668 asArgs = self.findAndParseFirstMacroInvocation(sCode,
4669 [ 'IEMOP_HLP_DONE_VEX_DECODING',
4670 'IEMOP_HLP_DONE_VEX_DECODING_L0',
4671 'IEMOP_HLP_DONE_VEX_DECODING_NO_VVVV',
4672 'IEMOP_HLP_DONE_VEX_DECODING_L0_AND_NO_VVVV',
4673 ]);
4674 if asArgs is not None:
4675 sMacro = asArgs[0];
4676 if sMacro in ('IEMOP_HLP_DONE_VEX_DECODING_L0', 'IEMOP_HLP_DONE_VEX_DECODING_L0_AND_NO_VVVV', ):
4677 for oInstr in self.aoCurInstrs:
4678 if 'vex_l_zero' not in oInstr.dHints:
4679 if oInstr.iLineMnemonicMacro >= 0:
4680 self.errorOnLine(oInstr.iLineMnemonicMacro,
4681 'Missing IEMOPHINT_VEX_L_ZERO! (%s on line %d)' % (sMacro, self.iLine,));
4682 oInstr.dHints['vex_l_zero'] = True;
4683
4684 #
4685 # IEMOP_MNEMONIC*
4686 #
4687 if sCode.find('IEMOP_MNEMONIC') >= 0:
4688 # IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats)
4689 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC');
4690 if asArgs is not None:
4691 if len(self.aoCurInstrs) == 1:
4692 oInstr = self.aoCurInstrs[0];
4693 if oInstr.sStats is None:
4694 oInstr.sStats = asArgs[1];
4695 self.deriveMnemonicAndOperandsFromStats(oInstr, asArgs[1]);
4696
4697 # IEMOP_MNEMONIC0EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
4698 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0EX');
4699 if asArgs is not None:
4700 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[6],
4701 asArgs[7], []);
4702 # IEMOP_MNEMONIC1EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
4703 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1EX');
4704 if asArgs is not None:
4705 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[7],
4706 asArgs[8], [asArgs[6],]);
4707 # IEMOP_MNEMONIC2EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
4708 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2EX');
4709 if asArgs is not None:
4710 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[8],
4711 asArgs[9], [asArgs[6], asArgs[7]]);
4712 # IEMOP_MNEMONIC3EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints,
4713 # a_fIemHints)
4714 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3EX');
4715 if asArgs is not None:
4716 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[9],
4717 asArgs[10], [asArgs[6], asArgs[7], asArgs[8],]);
4718 # IEMOP_MNEMONIC4EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints,
4719 # a_fIemHints)
4720 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4EX');
4721 if asArgs is not None:
4722 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[10],
4723 asArgs[11], [asArgs[6], asArgs[7], asArgs[8], asArgs[9],]);
4724
4725 # IEMOP_MNEMONIC0(a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
4726 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0');
4727 if asArgs is not None:
4728 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], []);
4729 # IEMOP_MNEMONIC1(a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
4730 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1');
4731 if asArgs is not None:
4732 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[5], asArgs[6], [asArgs[4],]);
4733 # IEMOP_MNEMONIC2(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
4734 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2');
4735 if asArgs is not None:
4736 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[6], asArgs[7],
4737 [asArgs[4], asArgs[5],]);
4738 # IEMOP_MNEMONIC3(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
4739 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3');
4740 if asArgs is not None:
4741 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[7], asArgs[8],
4742 [asArgs[4], asArgs[5], asArgs[6],]);
4743 # IEMOP_MNEMONIC4(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, a_fIemHints)
4744 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4');
4745 if asArgs is not None:
4746 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[8], asArgs[9],
4747 [asArgs[4], asArgs[5], asArgs[6], asArgs[7],]);
4748
4749 #
4750 # IEM_MC_BEGIN + IEM_MC_END.
4751 # We must support multiple instances per code snippet.
4752 #
4753 offCode = sCode.find('IEM_MC_');
4754 if offCode >= 0:
4755 for oMatch in self.oReMcBeginEnd.finditer(sCode, offCode):
4756 if oMatch.group(1) == 'END':
4757 self.workerIemMcEnd(offLine + oMatch.start());
4758 else:
4759 self.workerIemMcBegin(sCode, oMatch.start(), offLine + oMatch.start());
4760 return True;
4761
4762 return False;
4763
4764 def workerPreProcessRecreateMacroRegex(self):
4765 """
4766 Recreates self.oReMacros when self.dMacros changes.
4767 """
4768 if self.dMacros:
4769 sRegex = '';
4770 for sName, oMacro in self.dMacros.items():
4771 if sRegex:
4772 sRegex += '|' + sName;
4773 else:
4774 sRegex = '\\b(' + sName;
4775 if oMacro.asArgs is not None:
4776 sRegex += '\s*\(';
4777 else:
4778 sRegex += '\\b';
4779 sRegex += ')';
4780 self.oReMacros = re.compile(sRegex);
4781 else:
4782 self.oReMacros = None;
4783 return True;
4784
4785 def workerPreProcessDefine(self, sRest):
4786 """
4787 Handles a macro #define, the sRest is what follows after the directive word.
4788 """
4789
4790 #
4791 # If using line continutation, just concat all the lines together,
4792 # preserving the newline character but not the escaping.
4793 #
4794 iLineStart = self.iLine;
4795 while sRest.endswith('\\\n') and self.iLine < len(self.asLines):
4796 sRest = sRest[0:-2].rstrip() + '\n' + self.asLines[self.iLine];
4797 self.iLine += 1;
4798 #self.debug('workerPreProcessDefine: sRest=%s<EOS>' % (sRest,));
4799
4800 #
4801 # Use regex to split out the name, argument list and body.
4802 # If this fails, we assume it's a simple macro.
4803 #
4804 oMatch = self.oReHashDefine2.match(sRest);
4805 if oMatch:
4806 asArgs = [sParam.strip() for sParam in oMatch.group(2).split(',')];
4807 sBody = oMatch.group(3);
4808 else:
4809 oMatch = self.oReHashDefine3.match(sRest);
4810 if not oMatch:
4811 self.debug('workerPreProcessDefine: wtf? sRest=%s' % (sRest,));
4812 return self.error('bogus macro definition: %s' % (sRest,));
4813 asArgs = None;
4814 sBody = oMatch.group(2);
4815 sName = oMatch.group(1);
4816 assert sName == sName.strip();
4817 #self.debug('workerPreProcessDefine: sName=%s asArgs=%s sBody=%s<EOS>' % (sName, asArgs, sBody));
4818
4819 #
4820 # Is this of any interest to us? We do NOT support MC blocks wihtin
4821 # nested macro expansion, just to avoid lots of extra work.
4822 #
4823 if sBody.find("IEM_MC_BEGIN") < 0:
4824 #self.debug('workerPreProcessDefine: irrelevant (%s: %s)' % (sName, sBody));
4825 return True;
4826
4827 #
4828 # Add the macro.
4829 #
4830 if self.fDebugPreProc:
4831 self.debug('#define %s on line %u' % (sName, self.iLine,));
4832 self.dMacros[sName] = SimpleParser.Macro(sName, asArgs, sBody.strip(), iLineStart);
4833 return self.workerPreProcessRecreateMacroRegex();
4834
4835 def workerPreProcessUndef(self, sRest):
4836 """
4837 Handles a macro #undef, the sRest is what follows after the directive word.
4838 """
4839 # Quick comment strip and isolate the name.
4840 offSlash = sRest.find('/');
4841 if offSlash > 0:
4842 sRest = sRest[:offSlash];
4843 sName = sRest.strip();
4844
4845 # Remove the macro if we're clocking it.
4846 if sName in self.dMacros:
4847 if self.fDebugPreProc:
4848 self.debug('#undef %s on line %u' % (sName, self.iLine,));
4849 del self.dMacros[sName];
4850 return self.workerPreProcessRecreateMacroRegex();
4851
4852 return True;
4853
4854 def checkPreProcessorDirectiveForDefineUndef(self, sLine):
4855 """
4856 Handles a preprocessor directive.
4857 """
4858 oMatch = self.oReHashDefine.match(sLine);
4859 if oMatch:
4860 return self.workerPreProcessDefine(oMatch.group(1) + '\n');
4861
4862 oMatch = self.oReHashUndef.match(sLine);
4863 if oMatch:
4864 return self.workerPreProcessUndef(oMatch.group(1) + '\n');
4865 return False;
4866
4867 def expandMacros(self, sLine, oMatch):
4868 """
4869 Expands macros we know about in the given line.
4870 Currently we ASSUME there is only one and that is what oMatch matched.
4871 """
4872 #
4873 # Get our bearings.
4874 #
4875 offMatch = oMatch.start();
4876 sName = oMatch.group(1);
4877 assert sName == sLine[oMatch.start() : oMatch.end()];
4878 fWithArgs = sName.endswith('(');
4879 if fWithArgs:
4880 sName = sName[:-1].strip();
4881 oMacro = self.dMacros[sName] # type: SimpleParser.Macro
4882
4883 #
4884 # Deal with simple macro invocations w/o parameters.
4885 #
4886 if not fWithArgs:
4887 if self.fDebugPreProc:
4888 self.debug('expanding simple macro %s on line %u' % (sName, self.iLine,));
4889 return sLine[:offMatch] + oMacro.expandMacro(self) + sLine[oMatch.end():];
4890
4891 #
4892 # Complicated macro with parameters.
4893 # Start by extracting the parameters. ASSUMES they are all on the same line!
4894 #
4895 cLevel = 1;
4896 offCur = oMatch.end();
4897 offCurArg = offCur;
4898 asArgs = [];
4899 while True:
4900 if offCur >= len(sLine):
4901 self.raiseError('expandMacros: Invocation of macro %s spans multiple lines!' % (sName,));
4902 ch = sLine[offCur];
4903 if ch == '(':
4904 cLevel += 1;
4905 elif ch == ')':
4906 cLevel -= 1;
4907 if cLevel == 0:
4908 asArgs.append(sLine[offCurArg:offCur].strip());
4909 break;
4910 elif ch == ',' and cLevel == 1:
4911 asArgs.append(sLine[offCurArg:offCur].strip());
4912 offCurArg = offCur + 1;
4913 offCur += 1;
4914 if len(oMacro.asArgs) == 0 and len(asArgs) == 1 and asArgs[0] == '': # trick for empty parameter list.
4915 asArgs = [];
4916 if len(oMacro.asArgs) != len(asArgs):
4917 self.raiseError('expandMacros: Argument mismatch in %s invocation' % (oMacro.sName,));
4918
4919 #
4920 # Do the expanding.
4921 #
4922 if self.fDebugPreProc:
4923 self.debug('expanding macro %s on line %u with arguments %s' % (sName, self.iLine, asArgs));
4924 return sLine[:offMatch] + oMacro.expandMacro(self, asArgs) + sLine[offCur + 1 :];
4925
4926 def parse(self):
4927 """
4928 Parses the given file.
4929 Returns number or errors.
4930 Raises exception on fatal trouble.
4931 """
4932 #self.debug('Parsing %s' % (self.sSrcFile,));
4933
4934 while self.iLine < len(self.asLines):
4935 sLine = self.asLines[self.iLine];
4936 self.iLine += 1;
4937 #self.debug('line %u: %s' % (self.iLine, sLine[:-1]));
4938
4939 # Expand macros we know about if we're currently in code.
4940 if self.iState == self.kiCode and self.oReMacros:
4941 oMatch = self.oReMacros.search(sLine);
4942 if oMatch:
4943 sLine = self.expandMacros(sLine, oMatch);
4944 if self.fDebugPreProc:
4945 self.debug('line %d: expanded\n%s ==>\n%s' % (self.iLine, self.asLines[self.iLine - 1], sLine[:-1],));
4946 self.asLines[self.iLine - 1] = sLine;
4947
4948 # Look for comments.
4949 offSlash = sLine.find('/');
4950 if offSlash >= 0:
4951 if offSlash + 1 >= len(sLine) or sLine[offSlash + 1] != '/' or self.iState != self.kiCode:
4952 offLine = 0;
4953 while offLine < len(sLine):
4954 if self.iState == self.kiCode:
4955 # Look for substantial multiline comment so we pass the following MC as a whole line:
4956 # IEM_MC_ARG_CONST(uint8_t, bImmArg, /*=*/ bImm, 2);
4957 # Note! We ignore C++ comments here, assuming these aren't used in lines with C-style comments.
4958 offHit = sLine.find('/*', offLine);
4959 while offHit >= 0:
4960 offEnd = sLine.find('*/', offHit + 2);
4961 if offEnd < 0 or offEnd - offHit >= 16: # 16 chars is a bit random.
4962 break;
4963 offHit = sLine.find('/*', offEnd);
4964
4965 if offHit >= 0:
4966 self.checkCodeForMacro(sLine[offLine:offHit], offLine);
4967 self.sComment = '';
4968 self.iCommentLine = self.iLine;
4969 self.iState = self.kiCommentMulti;
4970 offLine = offHit + 2;
4971 else:
4972 self.checkCodeForMacro(sLine[offLine:], offLine);
4973 offLine = len(sLine);
4974
4975 elif self.iState == self.kiCommentMulti:
4976 offHit = sLine.find('*/', offLine);
4977 if offHit >= 0:
4978 self.sComment += sLine[offLine:offHit];
4979 self.iState = self.kiCode;
4980 offLine = offHit + 2;
4981 self.parseComment();
4982 else:
4983 self.sComment += sLine[offLine:];
4984 offLine = len(sLine);
4985 else:
4986 assert False;
4987 # C++ line comment.
4988 elif offSlash > 0:
4989 self.checkCodeForMacro(sLine[:offSlash], 0);
4990
4991 # No slash, but append the line if in multi-line comment.
4992 elif self.iState == self.kiCommentMulti:
4993 #self.debug('line %d: multi' % (self.iLine,));
4994 self.sComment += sLine;
4995
4996 # No slash, but check if this is a macro #define or #undef, since we
4997 # need to be able to selectively expand the ones containing MC blocks.
4998 elif self.iState == self.kiCode and sLine.lstrip().startswith('#'):
4999 if self.fDebugPreProc:
5000 self.debug('line %d: pre-proc' % (self.iLine,));
5001 self.checkPreProcessorDirectiveForDefineUndef(sLine);
5002
5003 # No slash, but check code line for relevant macro.
5004 elif ( self.iState == self.kiCode
5005 and (sLine.find('IEMOP_') >= 0 or sLine.find('FNIEMOPRM_DEF') >= 0 or sLine.find('IEM_MC') >= 0)):
5006 #self.debug('line %d: macro' % (self.iLine,));
5007 self.checkCodeForMacro(sLine, 0);
5008
5009 # If the line is a '}' in the first position, complete the instructions.
5010 elif self.iState == self.kiCode and sLine[0] == '}':
5011 #self.debug('line %d: }' % (self.iLine,));
5012 self.doneInstructions(fEndOfFunction = True);
5013
5014 # Look for instruction table on the form 'IEM_STATIC const PFNIEMOP g_apfnVexMap3'
5015 # so we can check/add @oppfx info from it.
5016 elif self.iState == self.kiCode and sLine.find('PFNIEMOP') > 0 and self.oReFunTable.match(sLine):
5017 self.parseFunctionTable(sLine);
5018
5019 self.doneInstructions(fEndOfFunction = True);
5020 self.debug('%3s%% / %3s stubs out of %4s instructions and %4s MC blocks in %s'
5021 % (self.cTotalStubs * 100 // max(self.cTotalInstr, 1), self.cTotalStubs, self.cTotalInstr,
5022 self.cTotalMcBlocks, os.path.basename(self.sSrcFile),));
5023 return self.printErrors();
5024
5025## The parsed content of IEMAllInstructionsCommonBodyMacros.h.
5026g_oParsedCommonBodyMacros = None # type: SimpleParser
5027
5028def __parseFileByName(sSrcFile, sDefaultMap):
5029 """
5030 Parses one source file for instruction specfications.
5031 """
5032 #
5033 # Read sSrcFile into a line array.
5034 #
5035 try:
5036 oFile = open(sSrcFile, "r"); # pylint: disable=consider-using-with,unspecified-encoding
5037 except Exception as oXcpt:
5038 raise Exception("failed to open %s for reading: %s" % (sSrcFile, oXcpt,));
5039 try:
5040 asLines = oFile.readlines();
5041 except Exception as oXcpt:
5042 raise Exception("failed to read %s: %s" % (sSrcFile, oXcpt,));
5043 finally:
5044 oFile.close();
5045
5046 #
5047 # On the first call, we parse IEMAllInstructionsCommonBodyMacros.h so we
5048 # can use the macros from it when processing the other files.
5049 #
5050 global g_oParsedCommonBodyMacros;
5051 if g_oParsedCommonBodyMacros is None:
5052 # Locate the file.
5053 sCommonBodyMacros = os.path.join(os.path.split(sSrcFile)[0], 'IEMAllInstructionsCommonBodyMacros.h');
5054 if not os.path.isfile(sCommonBodyMacros):
5055 sCommonBodyMacros = os.path.join(os.path.split(__file__)[0], 'IEMAllInstructionsCommonBodyMacros.h');
5056
5057 # Read it.
5058 try:
5059 with open(sCommonBodyMacros, "r") as oIncFile: # pylint: disable=unspecified-encoding
5060 asIncFiles = oIncFile.readlines();
5061 except Exception as oXcpt:
5062 raise Exception("failed to open/read %s: %s" % (sCommonBodyMacros, oXcpt,));
5063
5064 # Parse it.
5065 try:
5066 oParser = SimpleParser(sCommonBodyMacros, asIncFiles, 'one');
5067 if oParser.parse() != 0:
5068 raise ParserException('%s: errors: See above' % (sCommonBodyMacros, ));
5069 if oParser.cTotalInstr != 0 or oParser.cTotalStubs != 0 or oParser.cTotalTagged != 0 or oParser.cTotalMcBlocks != 0:
5070 raise ParserException('%s: error: Unexpectedly found %u instr, %u tags, %u stubs and %u MCs, expecting zero. %s'
5071 % (sCommonBodyMacros, oParser.cTotalInstr, oParser.cTotalStubs, oParser.cTotalTagged,
5072 oParser.cTotalMcBlocks,
5073 ', '.join(sorted( [str(oMcBlock.iBeginLine) for oMcBlock in g_aoMcBlocks]
5074 + [str(oInstr.iLineCreated) for oInstr in g_aoAllInstructions])),));
5075 except ParserException as oXcpt:
5076 print(str(oXcpt), file = sys.stderr);
5077 raise;
5078 g_oParsedCommonBodyMacros = oParser;
5079
5080 #
5081 # Do the parsing.
5082 #
5083 try:
5084 oParser = SimpleParser(sSrcFile, asLines, sDefaultMap, g_oParsedCommonBodyMacros);
5085 return (oParser.parse(), oParser) ;
5086 except ParserException as oXcpt:
5087 print(str(oXcpt), file = sys.stderr);
5088 raise;
5089
5090
5091def __doTestCopying():
5092 """
5093 Executes the asCopyTests instructions.
5094 """
5095 asErrors = [];
5096 for oDstInstr in g_aoAllInstructions:
5097 if oDstInstr.asCopyTests:
5098 for sSrcInstr in oDstInstr.asCopyTests:
5099 oSrcInstr = g_dAllInstructionsByStat.get(sSrcInstr, None);
5100 if oSrcInstr:
5101 aoSrcInstrs = [oSrcInstr,];
5102 else:
5103 aoSrcInstrs = g_dAllInstructionsByFunction.get(sSrcInstr, []);
5104 if aoSrcInstrs:
5105 for oSrcInstr in aoSrcInstrs:
5106 if oSrcInstr != oDstInstr:
5107 oDstInstr.aoTests.extend(oSrcInstr.aoTests);
5108 else:
5109 asErrors.append('%s:%s: error: @opcopytests reference "%s" matches the destination\n'
5110 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
5111 else:
5112 asErrors.append('%s:%s: error: @opcopytests reference "%s" not found\n'
5113 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
5114
5115 if asErrors:
5116 sys.stderr.write(u''.join(asErrors));
5117 return len(asErrors);
5118
5119
5120def __applyOnlyTest():
5121 """
5122 If g_aoOnlyTestInstructions contains any instructions, drop aoTests from
5123 all other instructions so that only these get tested.
5124 """
5125 if g_aoOnlyTestInstructions:
5126 for oInstr in g_aoAllInstructions:
5127 if oInstr.aoTests:
5128 if oInstr not in g_aoOnlyTestInstructions:
5129 oInstr.aoTests = [];
5130 return 0;
5131
5132## List of all main instruction files and their default maps.
5133g_aasAllInstrFilesAndDefaultMap = (
5134 ( 'IEMAllInstructionsCommon.cpp.h', 'one', ),
5135 ( 'IEMAllInstructionsOneByte.cpp.h', 'one', ),
5136 ( 'IEMAllInstructionsTwoByte0f.cpp.h', 'two0f', ),
5137 ( 'IEMAllInstructionsThree0f38.cpp.h', 'three0f38', ),
5138 ( 'IEMAllInstructionsThree0f3a.cpp.h', 'three0f3a', ),
5139 ( 'IEMAllInstructionsVexMap1.cpp.h', 'vexmap1', ),
5140 ( 'IEMAllInstructionsVexMap2.cpp.h', 'vexmap2', ),
5141 ( 'IEMAllInstructionsVexMap3.cpp.h', 'vexmap3', ),
5142 ( 'IEMAllInstructions3DNow.cpp.h', '3dnow', ),
5143);
5144
5145def __parseFilesWorker(asFilesAndDefaultMap):
5146 """
5147 Parses all the IEMAllInstruction*.cpp.h files.
5148
5149 Returns a list of the parsers on success.
5150 Raises exception on failure.
5151 """
5152 sSrcDir = os.path.dirname(os.path.abspath(__file__));
5153 cErrors = 0;
5154 aoParsers = [];
5155 for sFilename, sDefaultMap in asFilesAndDefaultMap:
5156 if not os.path.split(sFilename)[0] and not os.path.exists(sFilename):
5157 sFilename = os.path.join(sSrcDir, sFilename);
5158 cThisErrors, oParser = __parseFileByName(sFilename, sDefaultMap);
5159 cErrors += cThisErrors;
5160 aoParsers.append(oParser);
5161 cErrors += __doTestCopying();
5162 cErrors += __applyOnlyTest();
5163
5164 # Total stub stats:
5165 cTotalStubs = 0;
5166 for oInstr in g_aoAllInstructions:
5167 cTotalStubs += oInstr.fStub;
5168 print('debug: %3s%% / %3s stubs out of %4s instructions and %4s MC blocks in total'
5169 % (cTotalStubs * 100 // len(g_aoAllInstructions), cTotalStubs, len(g_aoAllInstructions), len(g_aoMcBlocks),),
5170 file = sys.stderr);
5171
5172 if cErrors != 0:
5173 raise Exception('%d parse errors' % (cErrors,));
5174 return aoParsers;
5175
5176
5177def parseFiles(asFiles):
5178 """
5179 Parses a selection of IEMAllInstruction*.cpp.h files.
5180
5181 Returns a list of the parsers on success.
5182 Raises exception on failure.
5183 """
5184 # Look up default maps for the files and call __parseFilesWorker to do the job.
5185 asFilesAndDefaultMap = [];
5186 for sFilename in asFiles:
5187 sName = os.path.split(sFilename)[1].lower();
5188 sMap = None;
5189 for asCur in g_aasAllInstrFilesAndDefaultMap:
5190 if asCur[0].lower() == sName:
5191 sMap = asCur[1];
5192 break;
5193 if not sMap:
5194 raise Exception('Unable to classify file: %s' % (sFilename,));
5195 asFilesAndDefaultMap.append((sFilename, sMap));
5196
5197 return __parseFilesWorker(asFilesAndDefaultMap);
5198
5199
5200def parseAll():
5201 """
5202 Parses all the IEMAllInstruction*.cpp.h files.
5203
5204 Returns a list of the parsers on success.
5205 Raises exception on failure.
5206 """
5207 return __parseFilesWorker(g_aasAllInstrFilesAndDefaultMap);
5208
5209
5210#
5211# Generators (may perhaps move later).
5212#
5213def __formatDisassemblerTableEntry(oInstr):
5214 """
5215 """
5216 sMacro = 'OP';
5217 cMaxOperands = 3;
5218 if len(oInstr.aoOperands) > 3:
5219 sMacro = 'OPVEX'
5220 cMaxOperands = 4;
5221 assert len(oInstr.aoOperands) <= cMaxOperands;
5222
5223 #
5224 # Format string.
5225 #
5226 sTmp = '%s("%s' % (sMacro, oInstr.sMnemonic,);
5227 for iOperand, oOperand in enumerate(oInstr.aoOperands):
5228 sTmp += ' ' if iOperand == 0 else ',';
5229 if g_kdOpTypes[oOperand.sType][2][0] != '%': ## @todo remove upper() later.
5230 sTmp += g_kdOpTypes[oOperand.sType][2].upper(); ## @todo remove upper() later.
5231 else:
5232 sTmp += g_kdOpTypes[oOperand.sType][2];
5233 sTmp += '",';
5234 asColumns = [ sTmp, ];
5235
5236 #
5237 # Decoders.
5238 #
5239 iStart = len(asColumns);
5240 if oInstr.sEncoding is None:
5241 pass;
5242 elif oInstr.sEncoding == 'ModR/M':
5243 # ASSUME the first operand is using the ModR/M encoding
5244 assert len(oInstr.aoOperands) >= 1 and oInstr.aoOperands[0].usesModRM(), "oInstr=%s" % (oInstr,);
5245 asColumns.append('IDX_ParseModRM,');
5246 elif oInstr.sEncoding in [ 'prefix', ]:
5247 for oOperand in oInstr.aoOperands:
5248 asColumns.append('0,');
5249 elif oInstr.sEncoding in [ 'fixed', 'VEX.fixed' ]:
5250 pass;
5251 elif oInstr.sEncoding == 'VEX.ModR/M':
5252 asColumns.append('IDX_ParseModRM,');
5253 elif oInstr.sEncoding == 'vex2':
5254 asColumns.append('IDX_ParseVex2b,')
5255 elif oInstr.sEncoding == 'vex3':
5256 asColumns.append('IDX_ParseVex3b,')
5257 elif oInstr.sEncoding in g_dInstructionMaps:
5258 asColumns.append(g_dInstructionMaps[oInstr.sEncoding].sDisParse + ',');
5259 else:
5260 ## @todo
5261 #IDX_ParseTwoByteEsc,
5262 #IDX_ParseGrp1,
5263 #IDX_ParseShiftGrp2,
5264 #IDX_ParseGrp3,
5265 #IDX_ParseGrp4,
5266 #IDX_ParseGrp5,
5267 #IDX_Parse3DNow,
5268 #IDX_ParseGrp6,
5269 #IDX_ParseGrp7,
5270 #IDX_ParseGrp8,
5271 #IDX_ParseGrp9,
5272 #IDX_ParseGrp10,
5273 #IDX_ParseGrp12,
5274 #IDX_ParseGrp13,
5275 #IDX_ParseGrp14,
5276 #IDX_ParseGrp15,
5277 #IDX_ParseGrp16,
5278 #IDX_ParseThreeByteEsc4,
5279 #IDX_ParseThreeByteEsc5,
5280 #IDX_ParseModFence,
5281 #IDX_ParseEscFP,
5282 #IDX_ParseNopPause,
5283 #IDX_ParseInvOpModRM,
5284 assert False, str(oInstr);
5285
5286 # Check for immediates and stuff in the remaining operands.
5287 for oOperand in oInstr.aoOperands[len(asColumns) - iStart:]:
5288 sIdx = g_kdOpTypes[oOperand.sType][0];
5289 #if sIdx != 'IDX_UseModRM':
5290 asColumns.append(sIdx + ',');
5291 asColumns.extend(['0,'] * (cMaxOperands - (len(asColumns) - iStart)));
5292
5293 #
5294 # Opcode and operands.
5295 #
5296 assert oInstr.sDisEnum, str(oInstr);
5297 asColumns.append(oInstr.sDisEnum + ',');
5298 iStart = len(asColumns)
5299 for oOperand in oInstr.aoOperands:
5300 asColumns.append('OP_PARM_' + g_kdOpTypes[oOperand.sType][3] + ',');
5301 asColumns.extend(['OP_PARM_NONE,'] * (cMaxOperands - (len(asColumns) - iStart)));
5302
5303 #
5304 # Flags.
5305 #
5306 sTmp = '';
5307 for sHint in sorted(oInstr.dHints.keys()):
5308 sDefine = g_kdHints[sHint];
5309 if sDefine.startswith('DISOPTYPE_'):
5310 if sTmp:
5311 sTmp += ' | ' + sDefine;
5312 else:
5313 sTmp += sDefine;
5314 if sTmp:
5315 sTmp += '),';
5316 else:
5317 sTmp += '0),';
5318 asColumns.append(sTmp);
5319
5320 #
5321 # Format the columns into a line.
5322 #
5323 aoffColumns = [4, 29, 49, 65, 77, 89, 109, 125, 141, 157, 183, 199];
5324 sLine = '';
5325 for i, s in enumerate(asColumns):
5326 if len(sLine) < aoffColumns[i]:
5327 sLine += ' ' * (aoffColumns[i] - len(sLine));
5328 else:
5329 sLine += ' ';
5330 sLine += s;
5331
5332 # OP("psrlw %Vdq,%Wdq", IDX_ParseModRM, IDX_UseModRM, 0, OP_PSRLW, OP_PARM_Vdq, OP_PARM_Wdq, OP_PARM_NONE,
5333 # DISOPTYPE_HARMLESS),
5334 # define OP(pszOpcode, idxParse1, idxParse2, idxParse3, opcode, param1, param2, param3, optype) \
5335 # { pszOpcode, idxParse1, idxParse2, idxParse3, 0, opcode, param1, param2, param3, 0, 0, optype }
5336 return sLine;
5337
5338def __checkIfShortTable(aoTableOrdered, oMap):
5339 """
5340 Returns (iInstr, cInstructions, fShortTable)
5341 """
5342
5343 # Determin how much we can trim off.
5344 cInstructions = len(aoTableOrdered);
5345 while cInstructions > 0 and aoTableOrdered[cInstructions - 1] is None:
5346 cInstructions -= 1;
5347
5348 iInstr = 0;
5349 while iInstr < cInstructions and aoTableOrdered[iInstr] is None:
5350 iInstr += 1;
5351
5352 # If we can save more than 30%, we go for the short table version.
5353 if iInstr + len(aoTableOrdered) - cInstructions >= len(aoTableOrdered) // 30:
5354 return (iInstr, cInstructions, True);
5355 _ = oMap; # Use this for overriding.
5356
5357 # Output the full table.
5358 return (0, len(aoTableOrdered), False);
5359
5360def generateDisassemblerTables(oDstFile = sys.stdout):
5361 """
5362 Generates disassembler tables.
5363
5364 Returns exit code.
5365 """
5366
5367 #
5368 # Parse all.
5369 #
5370 try:
5371 parseAll();
5372 except Exception as oXcpt:
5373 print('error: parseAll failed: %s' % (oXcpt,), file = sys.stderr);
5374 traceback.print_exc(file = sys.stderr);
5375 return 1;
5376
5377
5378 #
5379 # The disassembler uses a slightly different table layout to save space,
5380 # since several of the prefix varia
5381 #
5382 aoDisasmMaps = [];
5383 for sName, oMap in sorted(iter(g_dInstructionMaps.items()),
5384 key = lambda aKV: aKV[1].sEncoding + ''.join(aKV[1].asLeadOpcodes)):
5385 if oMap.sSelector != 'byte+pfx':
5386 aoDisasmMaps.append(oMap);
5387 else:
5388 # Split the map by prefix.
5389 aoDisasmMaps.append(oMap.copy(oMap.sName, 'none'));
5390 aoDisasmMaps.append(oMap.copy(oMap.sName + '_66', '0x66'));
5391 aoDisasmMaps.append(oMap.copy(oMap.sName + '_F3', '0xf3'));
5392 aoDisasmMaps.append(oMap.copy(oMap.sName + '_F2', '0xf2'));
5393
5394 #
5395 # Dump each map.
5396 #
5397 asHeaderLines = [];
5398 print("debug: maps=%s\n" % (', '.join([oMap.sName for oMap in aoDisasmMaps]),), file = sys.stderr);
5399 for oMap in aoDisasmMaps:
5400 sName = oMap.sName;
5401
5402 if not sName.startswith("vex"): continue; # only looking at the vex maps at the moment.
5403
5404 #
5405 # Get the instructions for the map and see if we can do a short version or not.
5406 #
5407 aoTableOrder = oMap.getInstructionsInTableOrder();
5408 cEntriesPerByte = oMap.getEntriesPerByte();
5409 (iInstrStart, iInstrEnd, fShortTable) = __checkIfShortTable(aoTableOrder, oMap);
5410
5411 #
5412 # Output the table start.
5413 # Note! Short tables are static and only accessible via the map range record.
5414 #
5415 asLines = [];
5416 asLines.append('/* Generated from: %-11s Selector: %-7s Encoding: %-7s Lead bytes opcodes: %s */'
5417 % ( oMap.sName, oMap.sSelector, oMap.sEncoding, ' '.join(oMap.asLeadOpcodes), ));
5418 if fShortTable:
5419 asLines.append('%sconst DISOPCODE %s[] =' % ('static ' if fShortTable else '', oMap.getDisasTableName(),));
5420 else:
5421 asHeaderLines.append('extern const DISOPCODE %s[%d];' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
5422 asLines.append( 'const DISOPCODE %s[%d] =' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
5423 asLines.append('{');
5424
5425 if fShortTable and (iInstrStart & ((0x10 * cEntriesPerByte) - 1)) != 0:
5426 asLines.append(' /* %#04x: */' % (iInstrStart,));
5427
5428 #
5429 # Output the instructions.
5430 #
5431 iInstr = iInstrStart;
5432 while iInstr < iInstrEnd:
5433 oInstr = aoTableOrder[iInstr];
5434 if (iInstr & ((0x10 * cEntriesPerByte) - 1)) == 0:
5435 if iInstr != iInstrStart:
5436 asLines.append('');
5437 asLines.append(' /* %x */' % ((iInstr // cEntriesPerByte) >> 4,));
5438
5439 if oInstr is None:
5440 # Invalid. Optimize blocks of invalid instructions.
5441 cInvalidInstrs = 1;
5442 while iInstr + cInvalidInstrs < len(aoTableOrder) and aoTableOrder[iInstr + cInvalidInstrs] is None:
5443 cInvalidInstrs += 1;
5444 if (iInstr & (0x10 * cEntriesPerByte - 1)) == 0 and cInvalidInstrs >= 0x10 * cEntriesPerByte:
5445 asLines.append(' INVALID_OPCODE_BLOCK_%u,' % (0x10 * cEntriesPerByte,));
5446 iInstr += 0x10 * cEntriesPerByte - 1;
5447 elif cEntriesPerByte > 1:
5448 if (iInstr & (cEntriesPerByte - 1)) == 0 and cInvalidInstrs >= cEntriesPerByte:
5449 asLines.append(' INVALID_OPCODE_BLOCK_%u,' % (cEntriesPerByte,));
5450 iInstr += 3;
5451 else:
5452 asLines.append(' /* %#04x/%d */ INVALID_OPCODE,'
5453 % (iInstr // cEntriesPerByte, iInstr % cEntriesPerByte));
5454 else:
5455 asLines.append(' /* %#04x */ INVALID_OPCODE,' % (iInstr));
5456 elif isinstance(oInstr, list):
5457 if len(oInstr) != 0:
5458 asLines.append(' /* %#04x */ ComplicatedListStuffNeedingWrapper, /* \n -- %s */'
5459 % (iInstr, '\n -- '.join([str(oItem) for oItem in oInstr]),));
5460 else:
5461 asLines.append(__formatDisassemblerTableEntry(oInstr));
5462 else:
5463 asLines.append(__formatDisassemblerTableEntry(oInstr));
5464
5465 iInstr += 1;
5466
5467 if iInstrStart >= iInstrEnd:
5468 asLines.append(' /* dummy */ INVALID_OPCODE');
5469
5470 asLines.append('};');
5471 asLines.append('AssertCompile(RT_ELEMENTS(%s) == %s);' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
5472
5473 #
5474 # We always emit a map range record, assuming the linker will eliminate the unnecessary ones.
5475 #
5476 asHeaderLines.append('extern const DISOPMAPDESC %sRange;' % (oMap.getDisasRangeName()));
5477 asLines.append('const DISOPMAPDESC %s = { &%s[0], %#04x, RT_ELEMENTS(%s) };'
5478 % (oMap.getDisasRangeName(), oMap.getDisasTableName(), iInstrStart, oMap.getDisasTableName(),));
5479
5480 #
5481 # Write out the lines.
5482 #
5483 oDstFile.write('\n'.join(asLines));
5484 oDstFile.write('\n');
5485 oDstFile.write('\n');
5486 #break; #for now
5487 return 0;
5488
5489if __name__ == '__main__':
5490 sys.exit(generateDisassemblerTables());
5491
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette