VirtualBox

source: vbox/trunk/src/VBox/Devices/Graphics/DevVGA-SVGA3d-dx-shader.cpp@ 95288

Last change on this file since 95288 was 95135, checked in by vboxsync, 2 years ago

Devices/Graphics: shader parser: bugref:9830

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 117.9 KB
Line 
1/* $Id: DevVGA-SVGA3d-dx-shader.cpp 95135 2022-05-30 11:46:42Z vboxsync $ */
2/** @file
3 * DevVMWare - VMWare SVGA device - VGPU10+ (DX) shader utilities.
4 */
5
6/*
7 * Copyright (C) 2020-2022 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#define LOG_GROUP LOG_GROUP_DEV_VMSVGA
23#include <VBox/AssertGuest.h>
24#include <VBox/log.h>
25
26#include <iprt/asm.h>
27#include <iprt/md5.h>
28#include <iprt/mem.h>
29#include <iprt/sort.h>
30#include <iprt/string.h>
31
32#include "DevVGA-SVGA3d-dx-shader.h"
33
34#ifdef RT_OS_WINDOWS
35#include <d3d11TokenizedProgramFormat.hpp>
36#else
37#define D3D11_SB_EXTENDED_OPCODE_RESOURCE_DIM 2
38#define D3D11_SB_EXTENDED_OPCODE_RESOURCE_RETURN_TYPE 3
39#endif
40
41/*
42 *
43 * DXBC shader binary format definitions.
44 *
45 */
46
47/* DXBC container header. */
48typedef struct DXBCHeader
49{
50 uint32_t u32DXBC; /* 0x43425844 = 'D', 'X', 'B', 'C' */
51 uint8_t au8Hash[16]; /* Modified MD5 hash. See dxbcHash. */
52 uint32_t u32Version; /* 1 */
53 uint32_t cbTotal; /* Total size in bytes. Including the header. */
54 uint32_t cBlob; /* Number of entries in aBlobOffset array. */
55 uint32_t aBlobOffset[1]; /* Offsets of blobs from the start of DXBC header. */
56} DXBCHeader;
57
58#define DXBC_MAGIC RT_MAKE_U32_FROM_U8('D', 'X', 'B', 'C')
59
60/* DXBC blob header. */
61typedef struct DXBCBlobHeader
62{
63 uint32_t u32BlobType; /* FourCC code. DXBC_BLOB_TYPE_* */
64 uint32_t cbBlob; /* Size of the blob excluding the blob header. 4 bytes aligned. */
65 /* Followed by the blob's data. */
66} DXBCBlobHeader;
67
68/* DXBC blob types. */
69#define DXBC_BLOB_TYPE_ISGN RT_MAKE_U32_FROM_U8('I', 'S', 'G', 'N')
70#define DXBC_BLOB_TYPE_OSGN RT_MAKE_U32_FROM_U8('O', 'S', 'G', 'N')
71#define DXBC_BLOB_TYPE_PCSG RT_MAKE_U32_FROM_U8('P', 'C', 'S', 'G')
72#define DXBC_BLOB_TYPE_SHDR RT_MAKE_U32_FROM_U8('S', 'H', 'D', 'R')
73/** @todo More... */
74
75/* 'SHDR' blob data format. */
76typedef struct DXBCBlobSHDR
77{
78 VGPU10ProgramToken programToken;
79 uint32_t cToken; /* Number of 32 bit tokens including programToken and cToken. */
80 uint32_t au32Token[1]; /* cToken - 2 number of tokens. */
81} DXBCBlobSHDR;
82
83/* Element of an input or output signature. */
84typedef struct DXBCBlobIOSGNElement
85{
86 uint32_t offElementName; /* Offset of the semantic's name relative to the start of the blob data. */
87 uint32_t idxSemantic; /* Semantic index. */
88 uint32_t enmSystemValue; /* SVGA3dDXSignatureSemanticName */
89 uint32_t enmComponentType; /* 1 - unsigned, 2 - integer, 3 - float. */
90 uint32_t idxRegister; /* Shader register index. Elements must be sorted by register index. */
91 union
92 {
93 struct
94 {
95 uint32_t mask : 8; /* Component mask. Lower 4 bits represent X, Y, Z, W channels. */
96 uint32_t mask2 : 8; /* Which components are used in the shader. */
97 uint32_t pad : 16;
98 } m;
99 uint32_t mask;
100 } u;
101} DXBCBlobIOSGNElement;
102
103/* 'ISGN' and 'OSGN' blob data format. */
104typedef struct DXBCBlobIOSGN
105{
106 uint32_t cElement; /* Number of signature elements. */
107 uint32_t offElement; /* Offset of the first element from the start of the blob. Equals to 8. */
108 DXBCBlobIOSGNElement aElement[1]; /* Signature elements. Size is cElement. */
109 /* Followed by ASCIIZ semantic names. */
110} DXBCBlobIOSGN;
111
112
113/*
114 * VGPU10 shader parser definitions.
115 */
116
117/* Parsed info about an operand index. */
118typedef struct VGPUOperandIndex
119{
120 uint32_t indexRepresentation; /* VGPU10_OPERAND_INDEX_REPRESENTATION */
121 uint64_t iOperandImmediate; /* Needs up to a qword. */
122 struct VGPUOperand *pOperandRelative; /* For VGPU10_OPERAND_INDEX_*RELATIVE */
123} VGPUOperandIndex;
124
125/* Parsed info about an operand. */
126typedef struct VGPUOperand
127{
128 uint32_t numComponents : 2; /* VGPU10_OPERAND_NUM_COMPONENTS */
129 uint32_t selectionMode : 2; /* VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE */
130 uint32_t mask : 4; /* 4-bits X, Y, Z, W mask for VGPU10_OPERAND_4_COMPONENT_MASK_MODE. */
131 uint32_t operandType : 8; /* VGPU10_OPERAND_TYPE */
132 uint32_t indexDimension : 2; /* VGPU10_OPERAND_INDEX_DIMENSION */
133 VGPUOperandIndex aOperandIndex[VGPU10_OPERAND_INDEX_3D]; /* Up to 3. */
134 uint32_t aImm[4]; /* Immediate values for VGPU10_OPERAND_TYPE_IMMEDIATE* */
135 uint32_t cOperandToken; /* Number of tokens in this operand. */
136 uint32_t const *paOperandToken; /* Pointer to operand tokens in the input buffer. */
137} VGPUOperand;
138
139/* Parsed info about an opcode. */
140typedef struct VGPUOpcode
141{
142 uint32_t cOpcodeToken; /* Number of tokens for this operation. */
143 uint32_t opcodeType; /* VGPU10_OPCODE_* */
144 uint32_t opcodeSubtype; /* For example VGPU10_VMWARE_OPCODE_* */
145 uint32_t semanticName; /* SVGA3dDXSignatureSemanticName for system value declarations. */
146 uint32_t cOperand; /* Number of operands for this instruction. */
147 uint32_t aIdxOperand[8]; /* Indices of the instruction operands in the aValOperand array. */
148 /* 8 should be enough for everyone. */
149 VGPUOperand aValOperand[16]; /* Operands including VGPU10_OPERAND_INDEX_*RELATIVE if they are used: */
150 /* Operand1, VGPU10_OPERAND_INDEX_*RELATIVE for Operand1, ... */
151 /* ... */
152 /* OperandN, VGPU10_OPERAND_INDEX_*RELATIVE for OperandN, ... */
153 /* 16 probably should be enough for everyone. */
154 uint32_t const *paOpcodeToken; /* Pointer to opcode tokens in the input buffer. */
155} VGPUOpcode;
156
157typedef struct VGPUOpcodeInfo
158{
159 uint32_t cOperand; /* Number of operands for this opcode. */
160} VGPUOpcodeInfo;
161
162static VGPUOpcodeInfo const g_aOpcodeInfo[] =
163{
164 { 3 }, /* VGPU10_OPCODE_ADD */
165 { 3 }, /* VGPU10_OPCODE_AND */
166 { 0 }, /* VGPU10_OPCODE_BREAK */
167 { 1 }, /* VGPU10_OPCODE_BREAKC */
168 { 1 }, /* VGPU10_OPCODE_CALL */
169 { 2 }, /* VGPU10_OPCODE_CALLC */
170 { 1 }, /* VGPU10_OPCODE_CASE */
171 { 0 }, /* VGPU10_OPCODE_CONTINUE */
172 { 1 }, /* VGPU10_OPCODE_CONTINUEC */
173 { 0 }, /* VGPU10_OPCODE_CUT */
174 { 0 }, /* VGPU10_OPCODE_DEFAULT */
175 { 2 }, /* VGPU10_OPCODE_DERIV_RTX */
176 { 2 }, /* VGPU10_OPCODE_DERIV_RTY */
177 { 1 }, /* VGPU10_OPCODE_DISCARD */
178 { 3 }, /* VGPU10_OPCODE_DIV */
179 { 3 }, /* VGPU10_OPCODE_DP2 */
180 { 3 }, /* VGPU10_OPCODE_DP3 */
181 { 3 }, /* VGPU10_OPCODE_DP4 */
182 { 0 }, /* VGPU10_OPCODE_ELSE */
183 { 0 }, /* VGPU10_OPCODE_EMIT */
184 { 0 }, /* VGPU10_OPCODE_EMITTHENCUT */
185 { 0 }, /* VGPU10_OPCODE_ENDIF */
186 { 0 }, /* VGPU10_OPCODE_ENDLOOP */
187 { 0 }, /* VGPU10_OPCODE_ENDSWITCH */
188 { 3 }, /* VGPU10_OPCODE_EQ */
189 { 2 }, /* VGPU10_OPCODE_EXP */
190 { 2 }, /* VGPU10_OPCODE_FRC */
191 { 2 }, /* VGPU10_OPCODE_FTOI */
192 { 2 }, /* VGPU10_OPCODE_FTOU */
193 { 3 }, /* VGPU10_OPCODE_GE */
194 { 3 }, /* VGPU10_OPCODE_IADD */
195 { 1 }, /* VGPU10_OPCODE_IF */
196 { 3 }, /* VGPU10_OPCODE_IEQ */
197 { 3 }, /* VGPU10_OPCODE_IGE */
198 { 3 }, /* VGPU10_OPCODE_ILT */
199 { 4 }, /* VGPU10_OPCODE_IMAD */
200 { 3 }, /* VGPU10_OPCODE_IMAX */
201 { 3 }, /* VGPU10_OPCODE_IMIN */
202 { 4 }, /* VGPU10_OPCODE_IMUL */
203 { 3 }, /* VGPU10_OPCODE_INE */
204 { 2 }, /* VGPU10_OPCODE_INEG */
205 { 3 }, /* VGPU10_OPCODE_ISHL */
206 { 3 }, /* VGPU10_OPCODE_ISHR */
207 { 2 }, /* VGPU10_OPCODE_ITOF */
208 { 1 }, /* VGPU10_OPCODE_LABEL */
209 { 3 }, /* VGPU10_OPCODE_LD */
210 { 4 }, /* VGPU10_OPCODE_LD_MS */
211 { 2 }, /* VGPU10_OPCODE_LOG */
212 { 0 }, /* VGPU10_OPCODE_LOOP */
213 { 3 }, /* VGPU10_OPCODE_LT */
214 { 4 }, /* VGPU10_OPCODE_MAD */
215 { 3 }, /* VGPU10_OPCODE_MIN */
216 { 3 }, /* VGPU10_OPCODE_MAX */
217 { UINT32_MAX }, /* VGPU10_OPCODE_CUSTOMDATA: special opcode */
218 { 2 }, /* VGPU10_OPCODE_MOV */
219 { 4 }, /* VGPU10_OPCODE_MOVC */
220 { 3 }, /* VGPU10_OPCODE_MUL */
221 { 3 }, /* VGPU10_OPCODE_NE */
222 { 0 }, /* VGPU10_OPCODE_NOP */
223 { 2 }, /* VGPU10_OPCODE_NOT */
224 { 3 }, /* VGPU10_OPCODE_OR */
225 { 3 }, /* VGPU10_OPCODE_RESINFO */
226 { 0 }, /* VGPU10_OPCODE_RET */
227 { 1 }, /* VGPU10_OPCODE_RETC */
228 { 2 }, /* VGPU10_OPCODE_ROUND_NE */
229 { 2 }, /* VGPU10_OPCODE_ROUND_NI */
230 { 2 }, /* VGPU10_OPCODE_ROUND_PI */
231 { 2 }, /* VGPU10_OPCODE_ROUND_Z */
232 { 2 }, /* VGPU10_OPCODE_RSQ */
233 { 4 }, /* VGPU10_OPCODE_SAMPLE */
234 { 5 }, /* VGPU10_OPCODE_SAMPLE_C */
235 { 5 }, /* VGPU10_OPCODE_SAMPLE_C_LZ */
236 { 5 }, /* VGPU10_OPCODE_SAMPLE_L */
237 { 6 }, /* VGPU10_OPCODE_SAMPLE_D */
238 { 5 }, /* VGPU10_OPCODE_SAMPLE_B */
239 { 2 }, /* VGPU10_OPCODE_SQRT */
240 { 1 }, /* VGPU10_OPCODE_SWITCH */
241 { 3 }, /* VGPU10_OPCODE_SINCOS */
242 { 4 }, /* VGPU10_OPCODE_UDIV */
243 { 3 }, /* VGPU10_OPCODE_ULT */
244 { 3 }, /* VGPU10_OPCODE_UGE */
245 { 4 }, /* VGPU10_OPCODE_UMUL */
246 { 4 }, /* VGPU10_OPCODE_UMAD */
247 { 3 }, /* VGPU10_OPCODE_UMAX */
248 { 3 }, /* VGPU10_OPCODE_UMIN */
249 { 3 }, /* VGPU10_OPCODE_USHR */
250 { 2 }, /* VGPU10_OPCODE_UTOF */
251 { 3 }, /* VGPU10_OPCODE_XOR */
252 { 1 }, /* VGPU10_OPCODE_DCL_RESOURCE */
253 { 1 }, /* VGPU10_OPCODE_DCL_CONSTANT_BUFFER */
254 { 1 }, /* VGPU10_OPCODE_DCL_SAMPLER */
255 { 1 }, /* VGPU10_OPCODE_DCL_INDEX_RANGE */
256 { 0 }, /* VGPU10_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY */
257 { 0 }, /* VGPU10_OPCODE_DCL_GS_INPUT_PRIMITIVE */
258 { 0 }, /* VGPU10_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT */
259 { 1 }, /* VGPU10_OPCODE_DCL_INPUT */
260 { 1 }, /* VGPU10_OPCODE_DCL_INPUT_SGV */
261 { 1 }, /* VGPU10_OPCODE_DCL_INPUT_SIV */
262 { 1 }, /* VGPU10_OPCODE_DCL_INPUT_PS */
263 { 1 }, /* VGPU10_OPCODE_DCL_INPUT_PS_SGV */
264 { 1 }, /* VGPU10_OPCODE_DCL_INPUT_PS_SIV */
265 { 1 }, /* VGPU10_OPCODE_DCL_OUTPUT */
266 { 1 }, /* VGPU10_OPCODE_DCL_OUTPUT_SGV */
267 { 1 }, /* VGPU10_OPCODE_DCL_OUTPUT_SIV */
268 { 0 }, /* VGPU10_OPCODE_DCL_TEMPS */
269 { 0 }, /* VGPU10_OPCODE_DCL_INDEXABLE_TEMP */
270 { 0 }, /* VGPU10_OPCODE_DCL_GLOBAL_FLAGS */
271 { UINT32_MAX }, /* VGPU10_OPCODE_VMWARE: special opcode */
272 { 4 }, /* VGPU10_OPCODE_LOD */
273 { 4 }, /* VGPU10_OPCODE_GATHER4 */
274 { 3 }, /* VGPU10_OPCODE_SAMPLE_POS */
275 { 2 }, /* VGPU10_OPCODE_SAMPLE_INFO */
276 { UINT32_MAX }, /* VGPU10_OPCODE_RESERVED1: special opcode */
277 { 0 }, /* VGPU10_OPCODE_HS_DECLS */
278 { 0 }, /* VGPU10_OPCODE_HS_CONTROL_POINT_PHASE */
279 { 0 }, /* VGPU10_OPCODE_HS_FORK_PHASE */
280 { 0 }, /* VGPU10_OPCODE_HS_JOIN_PHASE */
281 { 1 }, /* VGPU10_OPCODE_EMIT_STREAM */
282 { 1 }, /* VGPU10_OPCODE_CUT_STREAM */
283 { 1 }, /* VGPU10_OPCODE_EMITTHENCUT_STREAM */
284 { 1 }, /* VGPU10_OPCODE_INTERFACE_CALL */
285 { 2 }, /* VGPU10_OPCODE_BUFINFO */
286 { 2 }, /* VGPU10_OPCODE_DERIV_RTX_COARSE */
287 { 2 }, /* VGPU10_OPCODE_DERIV_RTX_FINE */
288 { 2 }, /* VGPU10_OPCODE_DERIV_RTY_COARSE */
289 { 2 }, /* VGPU10_OPCODE_DERIV_RTY_FINE */
290 { 5 }, /* VGPU10_OPCODE_GATHER4_C */
291 { 5 }, /* VGPU10_OPCODE_GATHER4_PO */
292 { 6 }, /* VGPU10_OPCODE_GATHER4_PO_C */
293 { 2 }, /* VGPU10_OPCODE_RCP */
294 { 2 }, /* VGPU10_OPCODE_F32TOF16 */
295 { 2 }, /* VGPU10_OPCODE_F16TOF32 */
296 { 4 }, /* VGPU10_OPCODE_UADDC */
297 { 4 }, /* VGPU10_OPCODE_USUBB */
298 { 2 }, /* VGPU10_OPCODE_COUNTBITS */
299 { 2 }, /* VGPU10_OPCODE_FIRSTBIT_HI */
300 { 2 }, /* VGPU10_OPCODE_FIRSTBIT_LO */
301 { 2 }, /* VGPU10_OPCODE_FIRSTBIT_SHI */
302 { 4 }, /* VGPU10_OPCODE_UBFE */
303 { 4 }, /* VGPU10_OPCODE_IBFE */
304 { 5 }, /* VGPU10_OPCODE_BFI */
305 { 2 }, /* VGPU10_OPCODE_BFREV */
306 { 5 }, /* VGPU10_OPCODE_SWAPC */
307 { 1 }, /* VGPU10_OPCODE_DCL_STREAM */
308 { 0 }, /* VGPU10_OPCODE_DCL_FUNCTION_BODY */
309 { 0 }, /* VGPU10_OPCODE_DCL_FUNCTION_TABLE */
310 { 0 }, /* VGPU10_OPCODE_DCL_INTERFACE */
311 { 0 }, /* VGPU10_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT */
312 { 0 }, /* VGPU10_OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT */
313 { 0 }, /* VGPU10_OPCODE_DCL_TESS_DOMAIN */
314 { 0 }, /* VGPU10_OPCODE_DCL_TESS_PARTITIONING */
315 { 0 }, /* VGPU10_OPCODE_DCL_TESS_OUTPUT_PRIMITIVE */
316 { 0 }, /* VGPU10_OPCODE_DCL_HS_MAX_TESSFACTOR */
317 { 0 }, /* VGPU10_OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT */
318 { 0 }, /* VGPU10_OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT */
319 { 0 }, /* VGPU10_OPCODE_DCL_THREAD_GROUP */
320 { 1 }, /* VGPU10_OPCODE_DCL_UAV_TYPED */
321 { 1 }, /* VGPU10_OPCODE_DCL_UAV_RAW */
322 { 1 }, /* VGPU10_OPCODE_DCL_UAV_STRUCTURED */
323 { 1 }, /* VGPU10_OPCODE_DCL_TGSM_RAW */
324 { 1 }, /* VGPU10_OPCODE_DCL_TGSM_STRUCTURED */
325 { 1 }, /* VGPU10_OPCODE_DCL_RESOURCE_RAW */
326 { 1 }, /* VGPU10_OPCODE_DCL_RESOURCE_STRUCTURED */
327 { 3 }, /* VGPU10_OPCODE_LD_UAV_TYPED */
328 { 3 }, /* VGPU10_OPCODE_STORE_UAV_TYPED */
329 { 3 }, /* VGPU10_OPCODE_LD_RAW */
330 { 3 }, /* VGPU10_OPCODE_STORE_RAW */
331 { 4 }, /* VGPU10_OPCODE_LD_STRUCTURED */
332 { 4 }, /* VGPU10_OPCODE_STORE_STRUCTURED */
333 { 3 }, /* VGPU10_OPCODE_ATOMIC_AND */
334 { 3 }, /* VGPU10_OPCODE_ATOMIC_OR */
335 { 3 }, /* VGPU10_OPCODE_ATOMIC_XOR */
336 { 4 }, /* VGPU10_OPCODE_ATOMIC_CMP_STORE */
337 { 3 }, /* VGPU10_OPCODE_ATOMIC_IADD */
338 { 3 }, /* VGPU10_OPCODE_ATOMIC_IMAX */
339 { 3 }, /* VGPU10_OPCODE_ATOMIC_IMIN */
340 { 3 }, /* VGPU10_OPCODE_ATOMIC_UMAX */
341 { 3 }, /* VGPU10_OPCODE_ATOMIC_UMIN */
342 { 2 }, /* VGPU10_OPCODE_IMM_ATOMIC_ALLOC */
343 { 2 }, /* VGPU10_OPCODE_IMM_ATOMIC_CONSUME */
344 { 4 }, /* VGPU10_OPCODE_IMM_ATOMIC_IADD */
345 { 4 }, /* VGPU10_OPCODE_IMM_ATOMIC_AND */
346 { 4 }, /* VGPU10_OPCODE_IMM_ATOMIC_OR */
347 { 4 }, /* VGPU10_OPCODE_IMM_ATOMIC_XOR */
348 { 4 }, /* VGPU10_OPCODE_IMM_ATOMIC_EXCH */
349 { 5 }, /* VGPU10_OPCODE_IMM_ATOMIC_CMP_EXCH */
350 { 4 }, /* VGPU10_OPCODE_IMM_ATOMIC_IMAX */
351 { 4 }, /* VGPU10_OPCODE_IMM_ATOMIC_IMIN */
352 { 4 }, /* VGPU10_OPCODE_IMM_ATOMIC_UMAX */
353 { 4 }, /* VGPU10_OPCODE_IMM_ATOMIC_UMIN */
354 { 0 }, /* VGPU10_OPCODE_SYNC */
355 { 3 }, /* VGPU10_OPCODE_DADD */
356 { 3 }, /* VGPU10_OPCODE_DMAX */
357 { 3 }, /* VGPU10_OPCODE_DMIN */
358 { 3 }, /* VGPU10_OPCODE_DMUL */
359 { 3 }, /* VGPU10_OPCODE_DEQ */
360 { 3 }, /* VGPU10_OPCODE_DGE */
361 { 3 }, /* VGPU10_OPCODE_DLT */
362 { 3 }, /* VGPU10_OPCODE_DNE */
363 { 2 }, /* VGPU10_OPCODE_DMOV */
364 { 4 }, /* VGPU10_OPCODE_DMOVC */
365 { 2 }, /* VGPU10_OPCODE_DTOF */
366 { 2 }, /* VGPU10_OPCODE_FTOD */
367 { 3 }, /* VGPU10_OPCODE_EVAL_SNAPPED */
368 { 3 }, /* VGPU10_OPCODE_EVAL_SAMPLE_INDEX */
369 { 2 }, /* VGPU10_OPCODE_EVAL_CENTROID */
370 { 0 }, /* VGPU10_OPCODE_DCL_GS_INSTANCE_COUNT */
371 { 0 }, /* VGPU10_OPCODE_ABORT */
372 { 0 }, /* VGPU10_OPCODE_DEBUG_BREAK */
373 { 0 }, /* VGPU10_OPCODE_RESERVED0 */
374 { 3 }, /* VGPU10_OPCODE_DDIV */
375 { 4 }, /* VGPU10_OPCODE_DFMA */
376 { 2 }, /* VGPU10_OPCODE_DRCP */
377 { 4 }, /* VGPU10_OPCODE_MSAD */
378 { 2 }, /* VGPU10_OPCODE_DTOI */
379 { 2 }, /* VGPU10_OPCODE_DTOU */
380 { 2 }, /* VGPU10_OPCODE_ITOD */
381 { 2 }, /* VGPU10_OPCODE_UTOD */
382};
383AssertCompile(RT_ELEMENTS(g_aOpcodeInfo) == VGPU10_NUM_OPCODES);
384
385#ifdef LOG_ENABLED
386/*
387 *
388 * Helpers to translate a VGPU10 shader constant to a string.
389 *
390 */
391
392#define SVGA_CASE_ID2STR(idx) case idx: return #idx
393
394static const char *dxbcOpcodeToString(uint32_t opcodeType)
395{
396 VGPU10_OPCODE_TYPE enm = (VGPU10_OPCODE_TYPE)opcodeType;
397 switch (enm)
398 {
399 SVGA_CASE_ID2STR(VGPU10_OPCODE_ADD);
400 SVGA_CASE_ID2STR(VGPU10_OPCODE_AND);
401 SVGA_CASE_ID2STR(VGPU10_OPCODE_BREAK);
402 SVGA_CASE_ID2STR(VGPU10_OPCODE_BREAKC);
403 SVGA_CASE_ID2STR(VGPU10_OPCODE_CALL);
404 SVGA_CASE_ID2STR(VGPU10_OPCODE_CALLC);
405 SVGA_CASE_ID2STR(VGPU10_OPCODE_CASE);
406 SVGA_CASE_ID2STR(VGPU10_OPCODE_CONTINUE);
407 SVGA_CASE_ID2STR(VGPU10_OPCODE_CONTINUEC);
408 SVGA_CASE_ID2STR(VGPU10_OPCODE_CUT);
409 SVGA_CASE_ID2STR(VGPU10_OPCODE_DEFAULT);
410 SVGA_CASE_ID2STR(VGPU10_OPCODE_DERIV_RTX);
411 SVGA_CASE_ID2STR(VGPU10_OPCODE_DERIV_RTY);
412 SVGA_CASE_ID2STR(VGPU10_OPCODE_DISCARD);
413 SVGA_CASE_ID2STR(VGPU10_OPCODE_DIV);
414 SVGA_CASE_ID2STR(VGPU10_OPCODE_DP2);
415 SVGA_CASE_ID2STR(VGPU10_OPCODE_DP3);
416 SVGA_CASE_ID2STR(VGPU10_OPCODE_DP4);
417 SVGA_CASE_ID2STR(VGPU10_OPCODE_ELSE);
418 SVGA_CASE_ID2STR(VGPU10_OPCODE_EMIT);
419 SVGA_CASE_ID2STR(VGPU10_OPCODE_EMITTHENCUT);
420 SVGA_CASE_ID2STR(VGPU10_OPCODE_ENDIF);
421 SVGA_CASE_ID2STR(VGPU10_OPCODE_ENDLOOP);
422 SVGA_CASE_ID2STR(VGPU10_OPCODE_ENDSWITCH);
423 SVGA_CASE_ID2STR(VGPU10_OPCODE_EQ);
424 SVGA_CASE_ID2STR(VGPU10_OPCODE_EXP);
425 SVGA_CASE_ID2STR(VGPU10_OPCODE_FRC);
426 SVGA_CASE_ID2STR(VGPU10_OPCODE_FTOI);
427 SVGA_CASE_ID2STR(VGPU10_OPCODE_FTOU);
428 SVGA_CASE_ID2STR(VGPU10_OPCODE_GE);
429 SVGA_CASE_ID2STR(VGPU10_OPCODE_IADD);
430 SVGA_CASE_ID2STR(VGPU10_OPCODE_IF);
431 SVGA_CASE_ID2STR(VGPU10_OPCODE_IEQ);
432 SVGA_CASE_ID2STR(VGPU10_OPCODE_IGE);
433 SVGA_CASE_ID2STR(VGPU10_OPCODE_ILT);
434 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMAD);
435 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMAX);
436 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMIN);
437 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMUL);
438 SVGA_CASE_ID2STR(VGPU10_OPCODE_INE);
439 SVGA_CASE_ID2STR(VGPU10_OPCODE_INEG);
440 SVGA_CASE_ID2STR(VGPU10_OPCODE_ISHL);
441 SVGA_CASE_ID2STR(VGPU10_OPCODE_ISHR);
442 SVGA_CASE_ID2STR(VGPU10_OPCODE_ITOF);
443 SVGA_CASE_ID2STR(VGPU10_OPCODE_LABEL);
444 SVGA_CASE_ID2STR(VGPU10_OPCODE_LD);
445 SVGA_CASE_ID2STR(VGPU10_OPCODE_LD_MS);
446 SVGA_CASE_ID2STR(VGPU10_OPCODE_LOG);
447 SVGA_CASE_ID2STR(VGPU10_OPCODE_LOOP);
448 SVGA_CASE_ID2STR(VGPU10_OPCODE_LT);
449 SVGA_CASE_ID2STR(VGPU10_OPCODE_MAD);
450 SVGA_CASE_ID2STR(VGPU10_OPCODE_MIN);
451 SVGA_CASE_ID2STR(VGPU10_OPCODE_MAX);
452 SVGA_CASE_ID2STR(VGPU10_OPCODE_CUSTOMDATA);
453 SVGA_CASE_ID2STR(VGPU10_OPCODE_MOV);
454 SVGA_CASE_ID2STR(VGPU10_OPCODE_MOVC);
455 SVGA_CASE_ID2STR(VGPU10_OPCODE_MUL);
456 SVGA_CASE_ID2STR(VGPU10_OPCODE_NE);
457 SVGA_CASE_ID2STR(VGPU10_OPCODE_NOP);
458 SVGA_CASE_ID2STR(VGPU10_OPCODE_NOT);
459 SVGA_CASE_ID2STR(VGPU10_OPCODE_OR);
460 SVGA_CASE_ID2STR(VGPU10_OPCODE_RESINFO);
461 SVGA_CASE_ID2STR(VGPU10_OPCODE_RET);
462 SVGA_CASE_ID2STR(VGPU10_OPCODE_RETC);
463 SVGA_CASE_ID2STR(VGPU10_OPCODE_ROUND_NE);
464 SVGA_CASE_ID2STR(VGPU10_OPCODE_ROUND_NI);
465 SVGA_CASE_ID2STR(VGPU10_OPCODE_ROUND_PI);
466 SVGA_CASE_ID2STR(VGPU10_OPCODE_ROUND_Z);
467 SVGA_CASE_ID2STR(VGPU10_OPCODE_RSQ);
468 SVGA_CASE_ID2STR(VGPU10_OPCODE_SAMPLE);
469 SVGA_CASE_ID2STR(VGPU10_OPCODE_SAMPLE_C);
470 SVGA_CASE_ID2STR(VGPU10_OPCODE_SAMPLE_C_LZ);
471 SVGA_CASE_ID2STR(VGPU10_OPCODE_SAMPLE_L);
472 SVGA_CASE_ID2STR(VGPU10_OPCODE_SAMPLE_D);
473 SVGA_CASE_ID2STR(VGPU10_OPCODE_SAMPLE_B);
474 SVGA_CASE_ID2STR(VGPU10_OPCODE_SQRT);
475 SVGA_CASE_ID2STR(VGPU10_OPCODE_SWITCH);
476 SVGA_CASE_ID2STR(VGPU10_OPCODE_SINCOS);
477 SVGA_CASE_ID2STR(VGPU10_OPCODE_UDIV);
478 SVGA_CASE_ID2STR(VGPU10_OPCODE_ULT);
479 SVGA_CASE_ID2STR(VGPU10_OPCODE_UGE);
480 SVGA_CASE_ID2STR(VGPU10_OPCODE_UMUL);
481 SVGA_CASE_ID2STR(VGPU10_OPCODE_UMAD);
482 SVGA_CASE_ID2STR(VGPU10_OPCODE_UMAX);
483 SVGA_CASE_ID2STR(VGPU10_OPCODE_UMIN);
484 SVGA_CASE_ID2STR(VGPU10_OPCODE_USHR);
485 SVGA_CASE_ID2STR(VGPU10_OPCODE_UTOF);
486 SVGA_CASE_ID2STR(VGPU10_OPCODE_XOR);
487 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_RESOURCE);
488 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_CONSTANT_BUFFER);
489 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_SAMPLER);
490 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INDEX_RANGE);
491 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY);
492 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_GS_INPUT_PRIMITIVE);
493 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT);
494 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INPUT);
495 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INPUT_SGV);
496 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INPUT_SIV);
497 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INPUT_PS);
498 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INPUT_PS_SGV);
499 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INPUT_PS_SIV);
500 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_OUTPUT);
501 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_OUTPUT_SGV);
502 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_OUTPUT_SIV);
503 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_TEMPS);
504 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INDEXABLE_TEMP);
505 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_GLOBAL_FLAGS);
506 SVGA_CASE_ID2STR(VGPU10_OPCODE_VMWARE);
507 SVGA_CASE_ID2STR(VGPU10_OPCODE_LOD);
508 SVGA_CASE_ID2STR(VGPU10_OPCODE_GATHER4);
509 SVGA_CASE_ID2STR(VGPU10_OPCODE_SAMPLE_POS);
510 SVGA_CASE_ID2STR(VGPU10_OPCODE_SAMPLE_INFO);
511 SVGA_CASE_ID2STR(VGPU10_OPCODE_RESERVED1);
512 SVGA_CASE_ID2STR(VGPU10_OPCODE_HS_DECLS);
513 SVGA_CASE_ID2STR(VGPU10_OPCODE_HS_CONTROL_POINT_PHASE);
514 SVGA_CASE_ID2STR(VGPU10_OPCODE_HS_FORK_PHASE);
515 SVGA_CASE_ID2STR(VGPU10_OPCODE_HS_JOIN_PHASE);
516 SVGA_CASE_ID2STR(VGPU10_OPCODE_EMIT_STREAM);
517 SVGA_CASE_ID2STR(VGPU10_OPCODE_CUT_STREAM);
518 SVGA_CASE_ID2STR(VGPU10_OPCODE_EMITTHENCUT_STREAM);
519 SVGA_CASE_ID2STR(VGPU10_OPCODE_INTERFACE_CALL);
520 SVGA_CASE_ID2STR(VGPU10_OPCODE_BUFINFO);
521 SVGA_CASE_ID2STR(VGPU10_OPCODE_DERIV_RTX_COARSE);
522 SVGA_CASE_ID2STR(VGPU10_OPCODE_DERIV_RTX_FINE);
523 SVGA_CASE_ID2STR(VGPU10_OPCODE_DERIV_RTY_COARSE);
524 SVGA_CASE_ID2STR(VGPU10_OPCODE_DERIV_RTY_FINE);
525 SVGA_CASE_ID2STR(VGPU10_OPCODE_GATHER4_C);
526 SVGA_CASE_ID2STR(VGPU10_OPCODE_GATHER4_PO);
527 SVGA_CASE_ID2STR(VGPU10_OPCODE_GATHER4_PO_C);
528 SVGA_CASE_ID2STR(VGPU10_OPCODE_RCP);
529 SVGA_CASE_ID2STR(VGPU10_OPCODE_F32TOF16);
530 SVGA_CASE_ID2STR(VGPU10_OPCODE_F16TOF32);
531 SVGA_CASE_ID2STR(VGPU10_OPCODE_UADDC);
532 SVGA_CASE_ID2STR(VGPU10_OPCODE_USUBB);
533 SVGA_CASE_ID2STR(VGPU10_OPCODE_COUNTBITS);
534 SVGA_CASE_ID2STR(VGPU10_OPCODE_FIRSTBIT_HI);
535 SVGA_CASE_ID2STR(VGPU10_OPCODE_FIRSTBIT_LO);
536 SVGA_CASE_ID2STR(VGPU10_OPCODE_FIRSTBIT_SHI);
537 SVGA_CASE_ID2STR(VGPU10_OPCODE_UBFE);
538 SVGA_CASE_ID2STR(VGPU10_OPCODE_IBFE);
539 SVGA_CASE_ID2STR(VGPU10_OPCODE_BFI);
540 SVGA_CASE_ID2STR(VGPU10_OPCODE_BFREV);
541 SVGA_CASE_ID2STR(VGPU10_OPCODE_SWAPC);
542 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_STREAM);
543 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_FUNCTION_BODY);
544 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_FUNCTION_TABLE);
545 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INTERFACE);
546 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT);
547 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT);
548 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_TESS_DOMAIN);
549 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_TESS_PARTITIONING);
550 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_TESS_OUTPUT_PRIMITIVE);
551 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_HS_MAX_TESSFACTOR);
552 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT);
553 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT);
554 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_THREAD_GROUP);
555 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_UAV_TYPED);
556 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_UAV_RAW);
557 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_UAV_STRUCTURED);
558 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_TGSM_RAW);
559 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_TGSM_STRUCTURED);
560 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_RESOURCE_RAW);
561 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_RESOURCE_STRUCTURED);
562 SVGA_CASE_ID2STR(VGPU10_OPCODE_LD_UAV_TYPED);
563 SVGA_CASE_ID2STR(VGPU10_OPCODE_STORE_UAV_TYPED);
564 SVGA_CASE_ID2STR(VGPU10_OPCODE_LD_RAW);
565 SVGA_CASE_ID2STR(VGPU10_OPCODE_STORE_RAW);
566 SVGA_CASE_ID2STR(VGPU10_OPCODE_LD_STRUCTURED);
567 SVGA_CASE_ID2STR(VGPU10_OPCODE_STORE_STRUCTURED);
568 SVGA_CASE_ID2STR(VGPU10_OPCODE_ATOMIC_AND);
569 SVGA_CASE_ID2STR(VGPU10_OPCODE_ATOMIC_OR);
570 SVGA_CASE_ID2STR(VGPU10_OPCODE_ATOMIC_XOR);
571 SVGA_CASE_ID2STR(VGPU10_OPCODE_ATOMIC_CMP_STORE);
572 SVGA_CASE_ID2STR(VGPU10_OPCODE_ATOMIC_IADD);
573 SVGA_CASE_ID2STR(VGPU10_OPCODE_ATOMIC_IMAX);
574 SVGA_CASE_ID2STR(VGPU10_OPCODE_ATOMIC_IMIN);
575 SVGA_CASE_ID2STR(VGPU10_OPCODE_ATOMIC_UMAX);
576 SVGA_CASE_ID2STR(VGPU10_OPCODE_ATOMIC_UMIN);
577 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_ALLOC);
578 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_CONSUME);
579 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_IADD);
580 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_AND);
581 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_OR);
582 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_XOR);
583 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_EXCH);
584 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_CMP_EXCH);
585 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_IMAX);
586 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_IMIN);
587 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_UMAX);
588 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_UMIN);
589 SVGA_CASE_ID2STR(VGPU10_OPCODE_SYNC);
590 SVGA_CASE_ID2STR(VGPU10_OPCODE_DADD);
591 SVGA_CASE_ID2STR(VGPU10_OPCODE_DMAX);
592 SVGA_CASE_ID2STR(VGPU10_OPCODE_DMIN);
593 SVGA_CASE_ID2STR(VGPU10_OPCODE_DMUL);
594 SVGA_CASE_ID2STR(VGPU10_OPCODE_DEQ);
595 SVGA_CASE_ID2STR(VGPU10_OPCODE_DGE);
596 SVGA_CASE_ID2STR(VGPU10_OPCODE_DLT);
597 SVGA_CASE_ID2STR(VGPU10_OPCODE_DNE);
598 SVGA_CASE_ID2STR(VGPU10_OPCODE_DMOV);
599 SVGA_CASE_ID2STR(VGPU10_OPCODE_DMOVC);
600 SVGA_CASE_ID2STR(VGPU10_OPCODE_DTOF);
601 SVGA_CASE_ID2STR(VGPU10_OPCODE_FTOD);
602 SVGA_CASE_ID2STR(VGPU10_OPCODE_EVAL_SNAPPED);
603 SVGA_CASE_ID2STR(VGPU10_OPCODE_EVAL_SAMPLE_INDEX);
604 SVGA_CASE_ID2STR(VGPU10_OPCODE_EVAL_CENTROID);
605 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_GS_INSTANCE_COUNT);
606 SVGA_CASE_ID2STR(VGPU10_OPCODE_ABORT);
607 SVGA_CASE_ID2STR(VGPU10_OPCODE_DEBUG_BREAK);
608 SVGA_CASE_ID2STR(VGPU10_OPCODE_RESERVED0);
609 SVGA_CASE_ID2STR(VGPU10_OPCODE_DDIV);
610 SVGA_CASE_ID2STR(VGPU10_OPCODE_DFMA);
611 SVGA_CASE_ID2STR(VGPU10_OPCODE_DRCP);
612 SVGA_CASE_ID2STR(VGPU10_OPCODE_MSAD);
613 SVGA_CASE_ID2STR(VGPU10_OPCODE_DTOI);
614 SVGA_CASE_ID2STR(VGPU10_OPCODE_DTOU);
615 SVGA_CASE_ID2STR(VGPU10_OPCODE_ITOD);
616 SVGA_CASE_ID2STR(VGPU10_OPCODE_UTOD);
617 SVGA_CASE_ID2STR(VGPU10_NUM_OPCODES);
618 }
619 return NULL;
620}
621
622
623static const char *dxbcShaderTypeToString(uint32_t value)
624{
625 VGPU10_PROGRAM_TYPE enm = (VGPU10_PROGRAM_TYPE)value;
626 switch (enm)
627 {
628 SVGA_CASE_ID2STR(VGPU10_PIXEL_SHADER);
629 SVGA_CASE_ID2STR(VGPU10_VERTEX_SHADER);
630 SVGA_CASE_ID2STR(VGPU10_GEOMETRY_SHADER);
631 SVGA_CASE_ID2STR(VGPU10_HULL_SHADER);
632 SVGA_CASE_ID2STR(VGPU10_DOMAIN_SHADER);
633 SVGA_CASE_ID2STR(VGPU10_COMPUTE_SHADER);
634 }
635 return NULL;
636}
637
638
639static const char *dxbcCustomDataClassToString(uint32_t value)
640{
641 VGPU10_CUSTOMDATA_CLASS enm = (VGPU10_CUSTOMDATA_CLASS)value;
642 switch (enm)
643 {
644 SVGA_CASE_ID2STR(VGPU10_CUSTOMDATA_COMMENT);
645 SVGA_CASE_ID2STR(VGPU10_CUSTOMDATA_DEBUGINFO);
646 SVGA_CASE_ID2STR(VGPU10_CUSTOMDATA_OPAQUE);
647 SVGA_CASE_ID2STR(VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER);
648 }
649 return NULL;
650}
651
652
653static const char *dxbcSystemNameToString(uint32_t value)
654{
655 VGPU10_SYSTEM_NAME enm = (VGPU10_SYSTEM_NAME)value;
656 switch (enm)
657 {
658 SVGA_CASE_ID2STR(VGPU10_NAME_UNDEFINED);
659 SVGA_CASE_ID2STR(VGPU10_NAME_POSITION);
660 SVGA_CASE_ID2STR(VGPU10_NAME_CLIP_DISTANCE);
661 SVGA_CASE_ID2STR(VGPU10_NAME_CULL_DISTANCE);
662 SVGA_CASE_ID2STR(VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX);
663 SVGA_CASE_ID2STR(VGPU10_NAME_VIEWPORT_ARRAY_INDEX);
664 SVGA_CASE_ID2STR(VGPU10_NAME_VERTEX_ID);
665 SVGA_CASE_ID2STR(VGPU10_NAME_PRIMITIVE_ID);
666 SVGA_CASE_ID2STR(VGPU10_NAME_INSTANCE_ID);
667 SVGA_CASE_ID2STR(VGPU10_NAME_IS_FRONT_FACE);
668 SVGA_CASE_ID2STR(VGPU10_NAME_SAMPLE_INDEX);
669 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR);
670 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR);
671 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR);
672 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR);
673 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR);
674 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR);
675 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR);
676 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR);
677 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR);
678 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_TRI_INSIDE_TESSFACTOR);
679 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_LINE_DETAIL_TESSFACTOR);
680 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_LINE_DENSITY_TESSFACTOR);
681 }
682 return NULL;
683}
684
685
686static const char *dxbcOperandTypeToString(uint32_t value)
687{
688 VGPU10_OPERAND_TYPE enm = (VGPU10_OPERAND_TYPE)value;
689 switch (enm)
690 {
691 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_TEMP);
692 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT);
693 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_OUTPUT);
694 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INDEXABLE_TEMP);
695 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_IMMEDIATE32);
696 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_IMMEDIATE64);
697 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_SAMPLER);
698 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_RESOURCE);
699 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_CONSTANT_BUFFER);
700 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER);
701 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_LABEL);
702 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID);
703 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_OUTPUT_DEPTH);
704 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_NULL);
705 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_RASTERIZER);
706 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK);
707 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_STREAM);
708 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_FUNCTION_BODY);
709 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_FUNCTION_TABLE);
710 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INTERFACE);
711 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_FUNCTION_INPUT);
712 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_FUNCTION_OUTPUT);
713 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID);
714 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_FORK_INSTANCE_ID);
715 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_JOIN_INSTANCE_ID);
716 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT);
717 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT);
718 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT);
719 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT);
720 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_THIS_POINTER);
721 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_UAV);
722 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY);
723 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_THREAD_ID);
724 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_THREAD_GROUP_ID);
725 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP);
726 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK);
727 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED);
728 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID);
729 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL);
730 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL);
731 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_CYCLE_COUNTER);
732 SVGA_CASE_ID2STR(VGPU10_NUM_OPERANDS);
733 }
734 return NULL;
735}
736
737
738static const char *dxbcOperandNumComponentsToString(uint32_t value)
739{
740 VGPU10_OPERAND_NUM_COMPONENTS enm = (VGPU10_OPERAND_NUM_COMPONENTS)value;
741 switch (enm)
742 {
743 SVGA_CASE_ID2STR(VGPU10_OPERAND_0_COMPONENT);
744 SVGA_CASE_ID2STR(VGPU10_OPERAND_1_COMPONENT);
745 SVGA_CASE_ID2STR(VGPU10_OPERAND_4_COMPONENT);
746 SVGA_CASE_ID2STR(VGPU10_OPERAND_N_COMPONENT);
747 }
748 return NULL;
749}
750
751
752static const char *dxbcOperandComponentModeToString(uint32_t value)
753{
754 VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE enm = (VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE)value;
755 switch (enm)
756 {
757 SVGA_CASE_ID2STR(VGPU10_OPERAND_4_COMPONENT_MASK_MODE);
758 SVGA_CASE_ID2STR(VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE);
759 SVGA_CASE_ID2STR(VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE);
760 }
761 return NULL;
762}
763
764
765static const char *dxbcOperandComponentNameToString(uint32_t value)
766{
767 VGPU10_COMPONENT_NAME enm = (VGPU10_COMPONENT_NAME)value;
768 switch (enm)
769 {
770 SVGA_CASE_ID2STR(VGPU10_COMPONENT_X);
771 SVGA_CASE_ID2STR(VGPU10_COMPONENT_Y);
772 SVGA_CASE_ID2STR(VGPU10_COMPONENT_Z);
773 SVGA_CASE_ID2STR(VGPU10_COMPONENT_W);
774 }
775 return NULL;
776}
777
778
779static const char *dxbcOperandIndexDimensionToString(uint32_t value)
780{
781 VGPU10_OPERAND_INDEX_DIMENSION enm = (VGPU10_OPERAND_INDEX_DIMENSION)value;
782 switch (enm)
783 {
784 SVGA_CASE_ID2STR(VGPU10_OPERAND_INDEX_0D);
785 SVGA_CASE_ID2STR(VGPU10_OPERAND_INDEX_1D);
786 SVGA_CASE_ID2STR(VGPU10_OPERAND_INDEX_2D);
787 SVGA_CASE_ID2STR(VGPU10_OPERAND_INDEX_3D);
788 }
789 return NULL;
790}
791
792
793static const char *dxbcOperandIndexRepresentationToString(uint32_t value)
794{
795 VGPU10_OPERAND_INDEX_REPRESENTATION enm = (VGPU10_OPERAND_INDEX_REPRESENTATION)value;
796 switch (enm)
797 {
798 SVGA_CASE_ID2STR(VGPU10_OPERAND_INDEX_IMMEDIATE32);
799 SVGA_CASE_ID2STR(VGPU10_OPERAND_INDEX_IMMEDIATE64);
800 SVGA_CASE_ID2STR(VGPU10_OPERAND_INDEX_RELATIVE);
801 SVGA_CASE_ID2STR(VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE);
802 SVGA_CASE_ID2STR(VGPU10_OPERAND_INDEX_IMMEDIATE64_PLUS_RELATIVE);
803 }
804 return NULL;
805}
806
807
808static const char *dxbcInterpolationModeToString(uint32_t value)
809{
810 VGPU10_INTERPOLATION_MODE enm = (VGPU10_INTERPOLATION_MODE)value;
811 switch (enm)
812 {
813 SVGA_CASE_ID2STR(VGPU10_INTERPOLATION_UNDEFINED);
814 SVGA_CASE_ID2STR(VGPU10_INTERPOLATION_CONSTANT);
815 SVGA_CASE_ID2STR(VGPU10_INTERPOLATION_LINEAR);
816 SVGA_CASE_ID2STR(VGPU10_INTERPOLATION_LINEAR_CENTROID);
817 SVGA_CASE_ID2STR(VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE);
818 SVGA_CASE_ID2STR(VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID);
819 SVGA_CASE_ID2STR(VGPU10_INTERPOLATION_LINEAR_SAMPLE);
820 SVGA_CASE_ID2STR(VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE);
821 }
822 return NULL;
823}
824
825
826static const char *dxbcResourceDimensionToString(uint32_t value)
827{
828 VGPU10_RESOURCE_DIMENSION enm = (VGPU10_RESOURCE_DIMENSION)value;
829 switch (enm)
830 {
831 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_UNKNOWN);
832 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_BUFFER);
833 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_TEXTURE1D);
834 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_TEXTURE2D);
835 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS);
836 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_TEXTURE3D);
837 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_TEXTURECUBE);
838 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_TEXTURE1DARRAY);
839 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY);
840 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY);
841 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY);
842 }
843 return NULL;
844}
845
846
847static const char *dxbcVmwareOpcodeTypeToString(uint32_t value)
848{
849 VGPU10_VMWARE_OPCODE_TYPE enm = (VGPU10_VMWARE_OPCODE_TYPE)value;
850 switch (enm)
851 {
852 SVGA_CASE_ID2STR(VGPU10_VMWARE_OPCODE_IDIV);
853 SVGA_CASE_ID2STR(VGPU10_VMWARE_OPCODE_DFRC);
854 SVGA_CASE_ID2STR(VGPU10_VMWARE_OPCODE_DRSQ);
855 SVGA_CASE_ID2STR(VGPU10_VMWARE_NUM_OPCODES);
856 }
857 return NULL;
858}
859
860#endif /* LOG_ENABLED */
861
862/*
863 * MD5 from IPRT (alt-md5.cpp) for DXBC hash calculation.
864 * DXBC hash function uses a different padding for the data, see dxbcHash.
865 * Therefore RTMd5Final is not needed. Two functions have been renamed: dxbcRTMd5Update dxbcRTMd5Init.
866 */
867
868
869/* The four core functions - F1 is optimized somewhat */
870/* #define F1(x, y, z) (x & y | ~x & z) */
871#define F1(x, y, z) (z ^ (x & (y ^ z)))
872#define F2(x, y, z) F1(z, x, y)
873#define F3(x, y, z) (x ^ y ^ z)
874#define F4(x, y, z) (y ^ (x | ~z))
875
876
877/* This is the central step in the MD5 algorithm. */
878#define MD5STEP(f, w, x, y, z, data, s) \
879 ( w += f(x, y, z) + data, w = w<<s | w>>(32-s), w += x )
880
881
882/**
883 * The core of the MD5 algorithm, this alters an existing MD5 hash to reflect
884 * the addition of 16 longwords of new data. RTMd5Update blocks the data and
885 * converts bytes into longwords for this routine.
886 */
887static void rtMd5Transform(uint32_t buf[4], uint32_t const in[16])
888{
889 uint32_t a, b, c, d;
890
891 a = buf[0];
892 b = buf[1];
893 c = buf[2];
894 d = buf[3];
895
896 /* fn, w, x, y, z, data, s) */
897 MD5STEP(F1, a, b, c, d, in[ 0] + 0xd76aa478, 7);
898 MD5STEP(F1, d, a, b, c, in[ 1] + 0xe8c7b756, 12);
899 MD5STEP(F1, c, d, a, b, in[ 2] + 0x242070db, 17);
900 MD5STEP(F1, b, c, d, a, in[ 3] + 0xc1bdceee, 22);
901 MD5STEP(F1, a, b, c, d, in[ 4] + 0xf57c0faf, 7);
902 MD5STEP(F1, d, a, b, c, in[ 5] + 0x4787c62a, 12);
903 MD5STEP(F1, c, d, a, b, in[ 6] + 0xa8304613, 17);
904 MD5STEP(F1, b, c, d, a, in[ 7] + 0xfd469501, 22);
905 MD5STEP(F1, a, b, c, d, in[ 8] + 0x698098d8, 7);
906 MD5STEP(F1, d, a, b, c, in[ 9] + 0x8b44f7af, 12);
907 MD5STEP(F1, c, d, a, b, in[10] + 0xffff5bb1, 17);
908 MD5STEP(F1, b, c, d, a, in[11] + 0x895cd7be, 22);
909 MD5STEP(F1, a, b, c, d, in[12] + 0x6b901122, 7);
910 MD5STEP(F1, d, a, b, c, in[13] + 0xfd987193, 12);
911 MD5STEP(F1, c, d, a, b, in[14] + 0xa679438e, 17);
912 MD5STEP(F1, b, c, d, a, in[15] + 0x49b40821, 22);
913
914 MD5STEP(F2, a, b, c, d, in[ 1] + 0xf61e2562, 5);
915 MD5STEP(F2, d, a, b, c, in[ 6] + 0xc040b340, 9);
916 MD5STEP(F2, c, d, a, b, in[11] + 0x265e5a51, 14);
917 MD5STEP(F2, b, c, d, a, in[ 0] + 0xe9b6c7aa, 20);
918 MD5STEP(F2, a, b, c, d, in[ 5] + 0xd62f105d, 5);
919 MD5STEP(F2, d, a, b, c, in[10] + 0x02441453, 9);
920 MD5STEP(F2, c, d, a, b, in[15] + 0xd8a1e681, 14);
921 MD5STEP(F2, b, c, d, a, in[ 4] + 0xe7d3fbc8, 20);
922 MD5STEP(F2, a, b, c, d, in[ 9] + 0x21e1cde6, 5);
923 MD5STEP(F2, d, a, b, c, in[14] + 0xc33707d6, 9);
924 MD5STEP(F2, c, d, a, b, in[ 3] + 0xf4d50d87, 14);
925 MD5STEP(F2, b, c, d, a, in[ 8] + 0x455a14ed, 20);
926 MD5STEP(F2, a, b, c, d, in[13] + 0xa9e3e905, 5);
927 MD5STEP(F2, d, a, b, c, in[ 2] + 0xfcefa3f8, 9);
928 MD5STEP(F2, c, d, a, b, in[ 7] + 0x676f02d9, 14);
929 MD5STEP(F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20);
930
931 MD5STEP(F3, a, b, c, d, in[ 5] + 0xfffa3942, 4);
932 MD5STEP(F3, d, a, b, c, in[ 8] + 0x8771f681, 11);
933 MD5STEP(F3, c, d, a, b, in[11] + 0x6d9d6122, 16);
934 MD5STEP(F3, b, c, d, a, in[14] + 0xfde5380c, 23);
935 MD5STEP(F3, a, b, c, d, in[ 1] + 0xa4beea44, 4);
936 MD5STEP(F3, d, a, b, c, in[ 4] + 0x4bdecfa9, 11);
937 MD5STEP(F3, c, d, a, b, in[ 7] + 0xf6bb4b60, 16);
938 MD5STEP(F3, b, c, d, a, in[10] + 0xbebfbc70, 23);
939 MD5STEP(F3, a, b, c, d, in[13] + 0x289b7ec6, 4);
940 MD5STEP(F3, d, a, b, c, in[ 0] + 0xeaa127fa, 11);
941 MD5STEP(F3, c, d, a, b, in[ 3] + 0xd4ef3085, 16);
942 MD5STEP(F3, b, c, d, a, in[ 6] + 0x04881d05, 23);
943 MD5STEP(F3, a, b, c, d, in[ 9] + 0xd9d4d039, 4);
944 MD5STEP(F3, d, a, b, c, in[12] + 0xe6db99e5, 11);
945 MD5STEP(F3, c, d, a, b, in[15] + 0x1fa27cf8, 16);
946 MD5STEP(F3, b, c, d, a, in[ 2] + 0xc4ac5665, 23);
947
948 MD5STEP(F4, a, b, c, d, in[ 0] + 0xf4292244, 6);
949 MD5STEP(F4, d, a, b, c, in[ 7] + 0x432aff97, 10);
950 MD5STEP(F4, c, d, a, b, in[14] + 0xab9423a7, 15);
951 MD5STEP(F4, b, c, d, a, in[ 5] + 0xfc93a039, 21);
952 MD5STEP(F4, a, b, c, d, in[12] + 0x655b59c3, 6);
953 MD5STEP(F4, d, a, b, c, in[ 3] + 0x8f0ccc92, 10);
954 MD5STEP(F4, c, d, a, b, in[10] + 0xffeff47d, 15);
955 MD5STEP(F4, b, c, d, a, in[ 1] + 0x85845dd1, 21);
956 MD5STEP(F4, a, b, c, d, in[ 8] + 0x6fa87e4f, 6);
957 MD5STEP(F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10);
958 MD5STEP(F4, c, d, a, b, in[ 6] + 0xa3014314, 15);
959 MD5STEP(F4, b, c, d, a, in[13] + 0x4e0811a1, 21);
960 MD5STEP(F4, a, b, c, d, in[ 4] + 0xf7537e82, 6);
961 MD5STEP(F4, d, a, b, c, in[11] + 0xbd3af235, 10);
962 MD5STEP(F4, c, d, a, b, in[ 2] + 0x2ad7d2bb, 15);
963 MD5STEP(F4, b, c, d, a, in[ 9] + 0xeb86d391, 21);
964
965 buf[0] += a;
966 buf[1] += b;
967 buf[2] += c;
968 buf[3] += d;
969}
970
971
972#ifdef RT_BIG_ENDIAN
973/*
974 * Note: this code is harmless on little-endian machines.
975 */
976static void rtMd5ByteReverse(uint32_t *buf, unsigned int longs)
977{
978 uint32_t t;
979 do
980 {
981 t = *buf;
982 t = RT_LE2H_U32(t);
983 *buf = t;
984 buf++;
985 } while (--longs);
986}
987#else /* little endian - do nothing */
988# define rtMd5ByteReverse(buf, len) do { /* Nothing */ } while (0)
989#endif
990
991
992/*
993 * Start MD5 accumulation. Set bit count to 0 and buffer to mysterious
994 * initialization constants.
995 */
996static void dxbcRTMd5Init(PRTMD5CONTEXT pCtx)
997{
998 pCtx->AltPrivate.buf[0] = 0x67452301;
999 pCtx->AltPrivate.buf[1] = 0xefcdab89;
1000 pCtx->AltPrivate.buf[2] = 0x98badcfe;
1001 pCtx->AltPrivate.buf[3] = 0x10325476;
1002
1003 pCtx->AltPrivate.bits[0] = 0;
1004 pCtx->AltPrivate.bits[1] = 0;
1005}
1006
1007
1008/*
1009 * Update context to reflect the concatenation of another buffer full
1010 * of bytes.
1011 */
1012/** @todo Optimize this, because len is always a multiple of 64. */
1013static void dxbcRTMd5Update(PRTMD5CONTEXT pCtx, const void *pvBuf, size_t len)
1014{
1015 const uint8_t *buf = (const uint8_t *)pvBuf;
1016 uint32_t t;
1017
1018 /* Update bitcount */
1019 t = pCtx->AltPrivate.bits[0];
1020 if ((pCtx->AltPrivate.bits[0] = t + ((uint32_t) len << 3)) < t)
1021 pCtx->AltPrivate.bits[1]++; /* Carry from low to high */
1022 pCtx->AltPrivate.bits[1] += (uint32_t)(len >> 29);
1023
1024 t = (t >> 3) & 0x3f; /* Bytes already in shsInfo->data */
1025
1026 /* Handle any leading odd-sized chunks */
1027 if (t)
1028 {
1029 uint8_t *p = (uint8_t *) pCtx->AltPrivate.in + t;
1030
1031 t = 64 - t;
1032 if (len < t)
1033 {
1034 memcpy(p, buf, len);
1035 return;
1036 }
1037 memcpy(p, buf, t);
1038 rtMd5ByteReverse(pCtx->AltPrivate.in, 16);
1039 rtMd5Transform(pCtx->AltPrivate.buf, pCtx->AltPrivate.in);
1040 buf += t;
1041 len -= t;
1042 }
1043
1044 /* Process data in 64-byte chunks */
1045#ifndef RT_BIG_ENDIAN
1046 if (!((uintptr_t)buf & 0x3))
1047 {
1048 while (len >= 64) {
1049 rtMd5Transform(pCtx->AltPrivate.buf, (uint32_t const *)buf);
1050 buf += 64;
1051 len -= 64;
1052 }
1053 }
1054 else
1055#endif
1056 {
1057 while (len >= 64) {
1058 memcpy(pCtx->AltPrivate.in, buf, 64);
1059 rtMd5ByteReverse(pCtx->AltPrivate.in, 16);
1060 rtMd5Transform(pCtx->AltPrivate.buf, pCtx->AltPrivate.in);
1061 buf += 64;
1062 len -= 64;
1063 }
1064 }
1065
1066 /* Handle any remaining bytes of data */
1067 memcpy(pCtx->AltPrivate.in, buf, len);
1068}
1069
1070
1071static void dxbcHash(void const *pvData, uint32_t cbData, uint8_t pabDigest[RTMD5HASHSIZE])
1072{
1073 size_t const kBlockSize = 64;
1074 uint8_t au8BlockBuffer[kBlockSize];
1075
1076 static uint8_t const s_au8Padding[kBlockSize] =
1077 {
1078 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1079 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1080 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1081 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1082 };
1083
1084 RTMD5CONTEXT Ctx;
1085 PRTMD5CONTEXT const pCtx = &Ctx;
1086 dxbcRTMd5Init(pCtx);
1087
1088 uint8_t const *pu8Data = (uint8_t *)pvData;
1089 size_t cbRemaining = cbData;
1090
1091 size_t const cbCompleteBlocks = cbData & ~ (kBlockSize - 1);
1092 dxbcRTMd5Update(pCtx, pu8Data, cbCompleteBlocks);
1093 pu8Data += cbCompleteBlocks;
1094 cbRemaining -= cbCompleteBlocks;
1095
1096 /* Custom padding. */
1097 if (cbRemaining >= kBlockSize - 2 * sizeof(uint32_t))
1098 {
1099 /* Two additional blocks. */
1100 memcpy(&au8BlockBuffer[0], pu8Data, cbRemaining);
1101 memcpy(&au8BlockBuffer[cbRemaining], s_au8Padding, kBlockSize - cbRemaining);
1102 dxbcRTMd5Update(pCtx, au8BlockBuffer, kBlockSize);
1103
1104 memset(&au8BlockBuffer[sizeof(uint32_t)], 0, kBlockSize - 2 * sizeof(uint32_t));
1105 }
1106 else
1107 {
1108 /* One additional block. */
1109 memcpy(&au8BlockBuffer[sizeof(uint32_t)], pu8Data, cbRemaining);
1110 memcpy(&au8BlockBuffer[sizeof(uint32_t) + cbRemaining], s_au8Padding, kBlockSize - cbRemaining - 2 * sizeof(uint32_t));
1111 }
1112
1113 /* Set the first and last dwords of the last block. */
1114 *(uint32_t *)&au8BlockBuffer[0] = cbData << 3;
1115 *(uint32_t *)&au8BlockBuffer[kBlockSize - sizeof(uint32_t)] = (cbData << 1) | 1;
1116 dxbcRTMd5Update(pCtx, au8BlockBuffer, kBlockSize);
1117
1118 AssertCompile(sizeof(pCtx->AltPrivate.buf) == RTMD5HASHSIZE);
1119 memcpy(pabDigest, pCtx->AltPrivate.buf, RTMD5HASHSIZE);
1120}
1121
1122
1123/*
1124 *
1125 * Shader token reader.
1126 *
1127 */
1128
1129typedef struct DXBCTokenReader
1130{
1131 uint32_t const *pToken; /* Next token to read. */
1132 uint32_t cToken; /* How many tokens total. */
1133 uint32_t cRemainingToken; /* How many tokens remain. */
1134} DXBCTokenReader;
1135
1136
1137#ifdef LOG_ENABLED
1138DECLINLINE(uint32_t) dxbcTokenReaderByteOffset(DXBCTokenReader *r)
1139{
1140 return (r->cToken - r->cRemainingToken) * 4;
1141}
1142#endif
1143
1144
1145#if 0 // Unused for now
1146DECLINLINE(uint32_t) dxbcTokenReaderRemaining(DXBCTokenReader *r)
1147{
1148 return r->cRemainingToken;
1149}
1150#endif
1151
1152
1153DECLINLINE(uint32_t const *) dxbcTokenReaderPtr(DXBCTokenReader *r)
1154{
1155 return r->pToken;
1156}
1157
1158
1159DECLINLINE(bool) dxbcTokenReaderCanRead(DXBCTokenReader *r, uint32_t cToken)
1160{
1161 return cToken <= r->cRemainingToken;
1162}
1163
1164
1165DECLINLINE(void) dxbcTokenReaderSkip(DXBCTokenReader *r, uint32_t cToken)
1166{
1167 AssertReturnVoid(r->cRemainingToken >= cToken);
1168 r->cRemainingToken -= cToken;
1169 r->pToken += cToken;
1170}
1171
1172
1173DECLINLINE(uint32_t) dxbcTokenReaderRead32(DXBCTokenReader *r)
1174{
1175 AssertReturn(r->cRemainingToken, 0);
1176 --r->cRemainingToken;
1177 return *(r->pToken++);
1178}
1179
1180
1181DECLINLINE(uint64_t) dxbcTokenReaderRead64(DXBCTokenReader *r)
1182{
1183 uint64_t const u64Low = dxbcTokenReaderRead32(r);
1184 uint64_t const u64High = dxbcTokenReaderRead32(r);
1185 return u64Low + (u64High << 32);
1186}
1187
1188
1189/*
1190 *
1191 * Byte writer.
1192 *
1193 */
1194
1195typedef struct DXBCByteWriter
1196{
1197 uint8_t *pu8ByteCodeBegin; /* First byte of the buffer. */
1198 uint8_t *pu8ByteCodePtr; /* Next byte to be written. */
1199 uint32_t cbAllocated; /* How many bytes allocated in the buffer. */
1200 uint32_t cbRemaining; /* How many bytes remain in the buffer. */
1201 uint32_t cbWritten; /* Offset of first never written byte.
1202 * Since the writer allows to jump in the buffer, this field tracks
1203 * the upper boundary of the written data.
1204 */
1205 int32_t rc;
1206} DXBCByteWriter;
1207
1208
1209typedef struct DXBCByteWriterState
1210{
1211 uint32_t off; /* Offset of the next free byte. */
1212} DXBCByteWriterState;
1213
1214
1215DECLINLINE(void *) dxbcByteWriterPtr(DXBCByteWriter *w)
1216{
1217 return w->pu8ByteCodePtr;
1218}
1219
1220
1221DECLINLINE(uint32_t) dxbcByteWriterSize(DXBCByteWriter *w)
1222{
1223 return (uint32_t)(w->pu8ByteCodePtr - w->pu8ByteCodeBegin);
1224}
1225
1226
1227static bool dxbcByteWriterRealloc(DXBCByteWriter *w, uint32_t cbNew)
1228{
1229 void *pvNew = RTMemAllocZ(cbNew);
1230 if (!pvNew)
1231 {
1232 w->rc = VERR_NO_MEMORY;
1233 return false;
1234 }
1235
1236 uint32_t const cbCurrent = dxbcByteWriterSize(w);
1237 memcpy(pvNew, w->pu8ByteCodeBegin, cbCurrent);
1238 RTMemFree(w->pu8ByteCodeBegin);
1239
1240 w->pu8ByteCodeBegin = (uint8_t *)pvNew;
1241 w->pu8ByteCodePtr = w->pu8ByteCodeBegin + cbCurrent;
1242 w->cbAllocated = cbNew;
1243 w->cbRemaining = cbNew - cbCurrent;
1244 return true;
1245}
1246
1247
1248DECLINLINE(bool) dxbcByteWriterSetOffset(DXBCByteWriter *w, uint32_t off, DXBCByteWriterState *pSavedWriterState)
1249{
1250 if (RT_FAILURE(w->rc))
1251 return false;
1252
1253 uint32_t const cbNew = RT_ALIGN_32(off, 1024);
1254 uint32_t const cbMax = 2 * SVGA3D_MAX_SHADER_MEMORY_BYTES;
1255 AssertReturnStmt(off < cbMax && cbNew < cbMax, w->rc = VERR_INVALID_PARAMETER, false);
1256
1257 if (cbNew > w->cbAllocated)
1258 {
1259 if (!dxbcByteWriterRealloc(w, cbNew))
1260 return false;
1261 }
1262
1263 pSavedWriterState->off = dxbcByteWriterSize(w);
1264
1265 w->pu8ByteCodePtr = w->pu8ByteCodeBegin + off;
1266 w->cbRemaining = w->cbAllocated - off;
1267 return true;
1268}
1269
1270
1271DECLINLINE(void) dxbcByteWriterRestore(DXBCByteWriter *w, DXBCByteWriterState *pSavedWriterState)
1272{
1273 w->pu8ByteCodePtr = w->pu8ByteCodeBegin + pSavedWriterState->off;
1274 w->cbRemaining = w->cbAllocated - pSavedWriterState->off;
1275}
1276
1277
1278DECLINLINE(void) dxbcByteWriterCommit(DXBCByteWriter *w, uint32_t cbCommit)
1279{
1280 if (RT_FAILURE(w->rc))
1281 return;
1282
1283 Assert(cbCommit < w->cbRemaining);
1284 cbCommit = RT_MIN(cbCommit, w->cbRemaining);
1285 w->pu8ByteCodePtr += cbCommit;
1286 w->cbRemaining -= cbCommit;
1287 w->cbWritten = RT_MAX(w->cbWritten, w->cbAllocated - w->cbRemaining);
1288}
1289
1290
1291DECLINLINE(bool) dxbcByteWriterCanWrite(DXBCByteWriter *w, uint32_t cbMore)
1292{
1293 if (RT_FAILURE(w->rc))
1294 return false;
1295
1296 if (cbMore <= w->cbRemaining)
1297 return true;
1298
1299 /* Do not allow to allocate more than 2 * SVGA3D_MAX_SHADER_MEMORY_BYTES */
1300 uint32_t const cbMax = 2 * SVGA3D_MAX_SHADER_MEMORY_BYTES;
1301 AssertReturnStmt(cbMore < cbMax && RT_ALIGN_32(cbMore, 4096) <= cbMax - w->cbAllocated, w->rc = VERR_INVALID_PARAMETER, false);
1302
1303 uint32_t cbNew = w->cbAllocated + RT_ALIGN_32(cbMore, 4096);
1304 return dxbcByteWriterRealloc(w, cbNew);
1305}
1306
1307
1308DECLINLINE(bool) dxbcByteWriterAddTokens(DXBCByteWriter *w, uint32_t const *paToken, uint32_t cToken)
1309{
1310 uint32_t const cbWrite = cToken * sizeof(uint32_t);
1311 if (dxbcByteWriterCanWrite(w, cbWrite))
1312 {
1313 memcpy(dxbcByteWriterPtr(w), paToken, cbWrite);
1314 dxbcByteWriterCommit(w, cbWrite);
1315 return true;
1316 }
1317
1318 AssertFailed();
1319 return false;
1320}
1321
1322
1323DECLINLINE(bool) dxbcByteWriterInit(DXBCByteWriter *w, uint32_t cbInitial)
1324{
1325 RT_ZERO(*w);
1326 return dxbcByteWriterCanWrite(w, cbInitial);
1327}
1328
1329
1330DECLINLINE(void) dxbcByteWriterReset(DXBCByteWriter *w)
1331{
1332 RTMemFree(w->pu8ByteCodeBegin);
1333 RT_ZERO(*w);
1334}
1335
1336
1337DECLINLINE(void) dxbcByteWriterFetchData(DXBCByteWriter *w, void **ppv, uint32_t *pcb)
1338{
1339 *ppv = w->pu8ByteCodeBegin;
1340 *pcb = w->cbWritten;
1341
1342 w->pu8ByteCodeBegin = NULL;
1343 dxbcByteWriterReset(w);
1344}
1345
1346
1347/*
1348 *
1349 * VGPU10 shader parser.
1350 *
1351 */
1352
1353/* Parse an instruction operand. */
1354static int dxbcParseOperand(DXBCTokenReader *r, VGPUOperand *paOperand, uint32_t *pcOperandRemain)
1355{
1356 ASSERT_GUEST_RETURN(*pcOperandRemain > 0, VERR_NOT_SUPPORTED);
1357
1358 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1359
1360 paOperand->paOperandToken = dxbcTokenReaderPtr(r);
1361 paOperand->cOperandToken = 0;
1362
1363 VGPU10OperandToken0 operand0;
1364 operand0.value = dxbcTokenReaderRead32(r);
1365
1366 Log6((" %s(%d) %s(%d) %s(%d) %s(%d)\n",
1367 dxbcOperandNumComponentsToString(operand0.numComponents), operand0.numComponents,
1368 dxbcOperandComponentModeToString(operand0.selectionMode), operand0.selectionMode,
1369 dxbcOperandTypeToString(operand0.operandType), operand0.operandType,
1370 dxbcOperandIndexDimensionToString(operand0.indexDimension), operand0.indexDimension));
1371
1372 ASSERT_GUEST_RETURN(operand0.numComponents <= VGPU10_OPERAND_4_COMPONENT, VERR_INVALID_PARAMETER);
1373 if ( operand0.operandType != VGPU10_OPERAND_TYPE_IMMEDIATE32
1374 && operand0.operandType != VGPU10_OPERAND_TYPE_IMMEDIATE64)
1375 {
1376 if (operand0.numComponents == VGPU10_OPERAND_4_COMPONENT)
1377 {
1378 ASSERT_GUEST_RETURN(operand0.selectionMode <= VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE, VERR_INVALID_PARAMETER);
1379 switch (operand0.selectionMode)
1380 {
1381 case VGPU10_OPERAND_4_COMPONENT_MASK_MODE:
1382 Log6((" Mask %#x\n", operand0.mask));
1383 break;
1384 case VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE:
1385 Log6((" Swizzle %s(%d) %s(%d) %s(%d) %s(%d)\n",
1386 dxbcOperandComponentNameToString(operand0.swizzleX), operand0.swizzleX,
1387 dxbcOperandComponentNameToString(operand0.swizzleY), operand0.swizzleY,
1388 dxbcOperandComponentNameToString(operand0.swizzleZ), operand0.swizzleZ,
1389 dxbcOperandComponentNameToString(operand0.swizzleW), operand0.swizzleW));
1390 break;
1391 case VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE:
1392 Log6((" Select %s(%d)\n",
1393 dxbcOperandComponentNameToString(operand0.selectMask), operand0.selectMask));
1394 break;
1395 default: /* Never happens. */
1396 break;
1397 }
1398 }
1399 }
1400
1401 if (operand0.extended)
1402 {
1403 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1404
1405 VGPU10OperandToken1 operand1;
1406 operand1.value = dxbcTokenReaderRead32(r);
1407 }
1408
1409 ASSERT_GUEST_RETURN(operand0.operandType < VGPU10_NUM_OPERANDS, VERR_INVALID_PARAMETER);
1410
1411 if ( operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32
1412 || operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE64)
1413 {
1414 uint32_t cComponent = 0;
1415 if (operand0.numComponents == VGPU10_OPERAND_4_COMPONENT)
1416 cComponent = 4;
1417 else if (operand0.numComponents == VGPU10_OPERAND_1_COMPONENT)
1418 cComponent = 1;
1419
1420 for (uint32_t i = 0; i < cComponent; ++i)
1421 {
1422 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1423 paOperand->aImm[i] = dxbcTokenReaderRead32(r);
1424 }
1425 }
1426
1427 paOperand->numComponents = operand0.numComponents;
1428 paOperand->selectionMode = operand0.selectionMode;
1429 paOperand->mask = operand0.mask;
1430 paOperand->operandType = operand0.operandType;
1431 paOperand->indexDimension = operand0.indexDimension;
1432
1433 int rc = VINF_SUCCESS;
1434 /* 'indexDimension' tells the number of indices. 'i' is the array index, i.e. i = 0 for 1D, etc. */
1435 for (uint32_t i = 0; i < operand0.indexDimension; ++i)
1436 {
1437 if (i == 0) /* VGPU10_OPERAND_INDEX_1D */
1438 paOperand->aOperandIndex[i].indexRepresentation = operand0.index0Representation;
1439 else if (i == 1) /* VGPU10_OPERAND_INDEX_2D */
1440 paOperand->aOperandIndex[i].indexRepresentation = operand0.index1Representation;
1441 else /* VGPU10_OPERAND_INDEX_3D */
1442 continue; /* Skip because it is "rarely if ever used" and is not supported by VGPU10. */
1443
1444 uint32_t const indexRepresentation = paOperand->aOperandIndex[i].indexRepresentation;
1445 switch (indexRepresentation)
1446 {
1447 case VGPU10_OPERAND_INDEX_IMMEDIATE32:
1448 {
1449 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1450 paOperand->aOperandIndex[i].iOperandImmediate = dxbcTokenReaderRead32(r);
1451 break;
1452 }
1453 case VGPU10_OPERAND_INDEX_IMMEDIATE64:
1454 {
1455 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 2), VERR_INVALID_PARAMETER);
1456 paOperand->aOperandIndex[i].iOperandImmediate = dxbcTokenReaderRead64(r);
1457 break;
1458 }
1459 case VGPU10_OPERAND_INDEX_RELATIVE:
1460 {
1461 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1462 paOperand->aOperandIndex[i].pOperandRelative = &paOperand[1];
1463 Log6((" [operand index %d] parsing relative\n", i));
1464 rc = dxbcParseOperand(r, &paOperand[1], pcOperandRemain);
1465 break;
1466 }
1467 case VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE:
1468 {
1469 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 2), VERR_INVALID_PARAMETER);
1470 paOperand->aOperandIndex[i].iOperandImmediate = dxbcTokenReaderRead32(r);
1471 paOperand->aOperandIndex[i].pOperandRelative = &paOperand[1];
1472 Log6((" [operand index %d] parsing relative\n", i));
1473 rc = dxbcParseOperand(r, &paOperand[1], pcOperandRemain);
1474 break;
1475 }
1476 case VGPU10_OPERAND_INDEX_IMMEDIATE64_PLUS_RELATIVE:
1477 {
1478 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 3), VERR_INVALID_PARAMETER);
1479 paOperand->aOperandIndex[i].iOperandImmediate = dxbcTokenReaderRead64(r);
1480 paOperand->aOperandIndex[i].pOperandRelative = &paOperand[1];
1481 Log6((" [operand index %d] parsing relative\n", i));
1482 rc = dxbcParseOperand(r, &paOperand[1], pcOperandRemain);
1483 break;
1484 }
1485 default:
1486 ASSERT_GUEST_FAILED_RETURN(VERR_INVALID_PARAMETER);
1487 }
1488 Log6((" [operand index %d] %s(%d): %#llx%s\n",
1489 i, dxbcOperandIndexRepresentationToString(indexRepresentation), indexRepresentation,
1490 paOperand->aOperandIndex[i].iOperandImmediate, paOperand->aOperandIndex[i].pOperandRelative ? " + relative" : ""));
1491 if (RT_FAILURE(rc))
1492 break;
1493 }
1494
1495 paOperand->cOperandToken = dxbcTokenReaderPtr(r) - paOperand->paOperandToken;
1496
1497 *pcOperandRemain -= 1;
1498 return VINF_SUCCESS;
1499}
1500
1501
1502/* Parse an instruction. */
1503static int dxbcParseOpcode(DXBCTokenReader *r, VGPUOpcode *pOpcode)
1504{
1505 RT_ZERO(*pOpcode);
1506 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1507
1508 pOpcode->paOpcodeToken = dxbcTokenReaderPtr(r);
1509
1510 VGPU10OpcodeToken0 opcode;
1511 opcode.value = dxbcTokenReaderRead32(r);
1512
1513 pOpcode->opcodeType = opcode.opcodeType;
1514 ASSERT_GUEST_RETURN(pOpcode->opcodeType < VGPU10_NUM_OPCODES, VERR_INVALID_PARAMETER);
1515
1516 Log6(("[%#x] %s length %d\n",
1517 dxbcTokenReaderByteOffset(r) - 4, dxbcOpcodeToString(pOpcode->opcodeType), opcode.instructionLength));
1518
1519 uint32_t const cOperand = g_aOpcodeInfo[pOpcode->opcodeType].cOperand;
1520 if (cOperand != UINT32_MAX)
1521 {
1522 ASSERT_GUEST_RETURN(cOperand < RT_ELEMENTS(pOpcode->aIdxOperand), VERR_INVALID_PARAMETER);
1523
1524 pOpcode->cOpcodeToken = opcode.instructionLength;
1525 uint32_t cOpcode = 1; /* Opcode token + extended opcode tokens. */
1526 if (opcode.extended)
1527 {
1528 if ( pOpcode->opcodeType == VGPU10_OPCODE_DCL_FUNCTION_BODY
1529 || pOpcode->opcodeType == VGPU10_OPCODE_DCL_FUNCTION_TABLE
1530 || pOpcode->opcodeType == VGPU10_OPCODE_DCL_INTERFACE
1531 || pOpcode->opcodeType == VGPU10_OPCODE_INTERFACE_CALL
1532 || pOpcode->opcodeType == VGPU10_OPCODE_DCL_THREAD_GROUP)
1533 {
1534 /* "next DWORD contains ... the actual instruction length in DWORD since it may not fit into 7 bits" */
1535 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1536 pOpcode->cOpcodeToken = dxbcTokenReaderRead32(r);
1537 ++cOpcode;
1538 }
1539 else
1540 {
1541 VGPU10OpcodeToken1 opcode1;
1542 do
1543 {
1544 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1545 opcode1.value = dxbcTokenReaderRead32(r);
1546 ++cOpcode;
1547 ASSERT_GUEST( opcode1.opcodeType == VGPU10_EXTENDED_OPCODE_SAMPLE_CONTROLS
1548 || opcode1.opcodeType == D3D11_SB_EXTENDED_OPCODE_RESOURCE_DIM
1549 || opcode1.opcodeType == D3D11_SB_EXTENDED_OPCODE_RESOURCE_RETURN_TYPE);
1550 } while(opcode1.extended);
1551 }
1552 }
1553
1554 ASSERT_GUEST_RETURN(pOpcode->cOpcodeToken >= 1 && pOpcode->cOpcodeToken < 256, VERR_INVALID_PARAMETER);
1555 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, pOpcode->cOpcodeToken - cOpcode), VERR_INVALID_PARAMETER);
1556
1557#ifdef LOG_ENABLED
1558 Log6((" %08X", opcode.value));
1559 for (uint32_t i = 1; i < pOpcode->cOpcodeToken; ++i)
1560 Log6((" %08X", r->pToken[i - 1]));
1561 Log6(("\n"));
1562
1563 if (pOpcode->opcodeType == VGPU10_OPCODE_DCL_RESOURCE)
1564 Log6((" %s\n",
1565 dxbcResourceDimensionToString(opcode.resourceDimension)));
1566 else
1567 Log6((" %s\n",
1568 dxbcInterpolationModeToString(opcode.interpolationMode)));
1569#endif
1570 /* Additional tokens before operands. */
1571 switch (pOpcode->opcodeType)
1572 {
1573 case VGPU10_OPCODE_INTERFACE_CALL:
1574 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1575 dxbcTokenReaderSkip(r, 1); /* Function index */
1576 break;
1577
1578 default:
1579 break;
1580 }
1581
1582 /* Operands. */
1583 uint32_t cOperandRemain = RT_ELEMENTS(pOpcode->aValOperand);
1584 for (uint32_t i = 0; i < cOperand; ++i)
1585 {
1586 Log6((" [operand %d]\n", i));
1587 uint32_t const idxOperand = RT_ELEMENTS(pOpcode->aValOperand) - cOperandRemain;
1588 pOpcode->aIdxOperand[i] = idxOperand;
1589 int rc = dxbcParseOperand(r, &pOpcode->aValOperand[idxOperand], &cOperandRemain);
1590 ASSERT_GUEST_RETURN(RT_SUCCESS(rc), VERR_INVALID_PARAMETER);
1591 }
1592
1593 pOpcode->cOperand = cOperand;
1594
1595 /* Additional tokens after operands. */
1596 switch (pOpcode->opcodeType)
1597 {
1598 case VGPU10_OPCODE_DCL_INPUT_SIV:
1599 case VGPU10_OPCODE_DCL_INPUT_SGV:
1600 case VGPU10_OPCODE_DCL_INPUT_PS_SIV:
1601 case VGPU10_OPCODE_DCL_INPUT_PS_SGV:
1602 case VGPU10_OPCODE_DCL_OUTPUT_SIV:
1603 case VGPU10_OPCODE_DCL_OUTPUT_SGV:
1604 {
1605 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1606
1607 VGPU10NameToken name;
1608 name.value = dxbcTokenReaderRead32(r);
1609 Log6((" %s(%d)\n",
1610 dxbcSystemNameToString(name.name), name.name));
1611 pOpcode->semanticName = name.name;
1612 break;
1613 }
1614 case VGPU10_OPCODE_DCL_RESOURCE:
1615 {
1616 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1617 dxbcTokenReaderSkip(r, 1); /* ResourceReturnTypeToken */
1618 break;
1619 }
1620 case VGPU10_OPCODE_DCL_TEMPS:
1621 {
1622 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1623 dxbcTokenReaderSkip(r, 1); /* number of temps */
1624 break;
1625 }
1626 case VGPU10_OPCODE_DCL_INDEXABLE_TEMP:
1627 {
1628 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 3), VERR_INVALID_PARAMETER);
1629 dxbcTokenReaderSkip(r, 3); /* register index; number of registers; number of components */
1630 break;
1631 }
1632 case VGPU10_OPCODE_DCL_INDEX_RANGE:
1633 {
1634 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1635 dxbcTokenReaderSkip(r, 1); /* count of registers */
1636 break;
1637 }
1638 case VGPU10_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT:
1639 {
1640 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1641 dxbcTokenReaderSkip(r, 1); /* maximum number of primitives */
1642 break;
1643 }
1644 case VGPU10_OPCODE_DCL_GS_INSTANCE_COUNT:
1645 {
1646 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1647 dxbcTokenReaderSkip(r, 1); /* number of instances */
1648 break;
1649 }
1650 case VGPU10_OPCODE_DCL_HS_MAX_TESSFACTOR:
1651 {
1652 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1653 dxbcTokenReaderSkip(r, 1); /* maximum TessFactor */
1654 break;
1655 }
1656 case VGPU10_OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT:
1657 case VGPU10_OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT:
1658 {
1659 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1660 dxbcTokenReaderSkip(r, 1); /* number of instances of the current fork/join phase program to execute */
1661 break;
1662 }
1663 case VGPU10_OPCODE_DCL_THREAD_GROUP:
1664 {
1665 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 3), VERR_INVALID_PARAMETER);
1666 dxbcTokenReaderSkip(r, 3); /* Thread Group dimensions as UINT32: x, y, z */
1667 break;
1668 }
1669 case VGPU10_OPCODE_DCL_UAV_TYPED:
1670 {
1671 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1672 dxbcTokenReaderSkip(r, 1); /* ResourceReturnTypeToken */
1673 break;
1674 }
1675 case VGPU10_OPCODE_DCL_UAV_STRUCTURED:
1676 {
1677 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1678 dxbcTokenReaderSkip(r, 1); /* byte stride */
1679 break;
1680 }
1681 case VGPU10_OPCODE_DCL_TGSM_RAW:
1682 {
1683 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1684 dxbcTokenReaderSkip(r, 1); /* element count */
1685 break;
1686 }
1687 case VGPU10_OPCODE_DCL_TGSM_STRUCTURED:
1688 {
1689 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 2), VERR_INVALID_PARAMETER);
1690 dxbcTokenReaderSkip(r, 2); /* struct byte stride; struct count */
1691 break;
1692 }
1693 case VGPU10_OPCODE_DCL_RESOURCE_STRUCTURED:
1694 {
1695 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1696 dxbcTokenReaderSkip(r, 1); /* struct byte stride */
1697 break;
1698 }
1699 default:
1700 break;
1701 }
1702 }
1703 else
1704 {
1705 /* Special opcodes. */
1706 if (pOpcode->opcodeType == VGPU10_OPCODE_CUSTOMDATA)
1707 {
1708 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1709 pOpcode->cOpcodeToken = dxbcTokenReaderRead32(r);
1710
1711 if (pOpcode->cOpcodeToken < 2)
1712 pOpcode->cOpcodeToken = 2;
1713 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, pOpcode->cOpcodeToken - 2), VERR_INVALID_PARAMETER);
1714
1715#ifdef LOG_ENABLED
1716 Log6((" %08X", opcode.value));
1717 for (uint32_t i = 1; i < pOpcode->cOpcodeToken; ++i)
1718 Log6((" %08X", r->pToken[i - 1]));
1719 Log6(("\n"));
1720
1721 Log6((" %s\n",
1722 dxbcCustomDataClassToString(opcode.customDataClass)));
1723#endif
1724 dxbcTokenReaderSkip(r, pOpcode->cOpcodeToken - 2);
1725 }
1726 else if (pOpcode->opcodeType == VGPU10_OPCODE_VMWARE)
1727 {
1728 pOpcode->cOpcodeToken = opcode.instructionLength;
1729 pOpcode->opcodeSubtype = opcode.vmwareOpcodeType;
1730
1731#ifdef LOG_ENABLED
1732 Log6((" %08X", opcode.value));
1733 for (uint32_t i = 1; i < pOpcode->cOpcodeToken; ++i)
1734 Log6((" %08X", r->pToken[i - 1]));
1735 Log6(("\n"));
1736
1737 Log6((" %s(%d)\n",
1738 dxbcVmwareOpcodeTypeToString(opcode.vmwareOpcodeType), opcode.vmwareOpcodeType));
1739#endif
1740
1741 if (opcode.vmwareOpcodeType == VGPU10_VMWARE_OPCODE_IDIV)
1742 {
1743 /* Integer divide. */
1744 pOpcode->cOperand = 4; /* dstQuit, dstRem, src0, src1. */
1745
1746 /* Operands. */
1747 uint32_t cOperandRemain = RT_ELEMENTS(pOpcode->aValOperand);
1748 for (uint32_t i = 0; i < pOpcode->cOperand; ++i)
1749 {
1750 Log6((" [operand %d]\n", i));
1751 uint32_t const idxOperand = RT_ELEMENTS(pOpcode->aValOperand) - cOperandRemain;
1752 pOpcode->aIdxOperand[i] = idxOperand;
1753 int rc = dxbcParseOperand(r, &pOpcode->aValOperand[idxOperand], &cOperandRemain);
1754 ASSERT_GUEST_RETURN(RT_SUCCESS(rc), VERR_INVALID_PARAMETER);
1755 }
1756 }
1757 //else if (opcode.vmwareOpcodeType == VGPU10_VMWARE_OPCODE_DFRC)
1758 //else if (opcode.vmwareOpcodeType == VGPU10_VMWARE_OPCODE_DRSQ)
1759 else
1760 {
1761 /** @todo implement */
1762 ASSERT_GUEST_FAILED_RETURN(VERR_INVALID_PARAMETER);
1763 }
1764 }
1765 else
1766 ASSERT_GUEST_FAILED_RETURN(VERR_INVALID_PARAMETER);
1767
1768 // pOpcode->cOperand = 0;
1769 }
1770
1771 return VINF_SUCCESS;
1772}
1773
1774
1775typedef struct DXBCOUTPUTCTX
1776{
1777 VGPU10ProgramToken programToken;
1778 uint32_t cToken; /* Number of tokens in the original shader code. */
1779
1780 uint32_t offSubroutine; /* Current offset where to write subroutines. */
1781} DXBCOUTPUTCTX;
1782
1783
1784static void dxbcOutputInit(DXBCOUTPUTCTX *pOutctx, VGPU10ProgramToken const *pProgramToken, uint32_t cToken)
1785{
1786 RT_ZERO(*pOutctx);
1787 pOutctx->programToken = *pProgramToken;
1788 pOutctx->cToken = cToken;
1789
1790 pOutctx->offSubroutine = cToken * 4;
1791}
1792
1793
1794static int dxbcEmitVmwareIDIV(DXBCOUTPUTCTX *pOutctx, DXBCByteWriter *w, VGPUOpcode *pOpcode)
1795{
1796 /* Insert a call and append a subroutne. */
1797 VGPU10OpcodeToken0 opcode;
1798 VGPU10OperandToken0 operand;
1799
1800 uint32_t const label = (pOutctx->offSubroutine - dxbcByteWriterSize(w)) / 4;
1801
1802 /*
1803 * Call
1804 */
1805 opcode.value = 0;
1806 opcode.opcodeType = VGPU10_OPCODE_CALL;
1807 opcode.instructionLength = 3;
1808 dxbcByteWriterAddTokens(w, &opcode.value, 1);
1809
1810 operand.value = 0;
1811 operand.numComponents = VGPU10_OPERAND_1_COMPONENT;
1812 operand.operandType = VGPU10_OPERAND_TYPE_LABEL;
1813 operand.indexDimension = VGPU10_OPERAND_INDEX_1D;
1814 operand.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
1815 dxbcByteWriterAddTokens(w, &operand.value, 1);
1816
1817 dxbcByteWriterAddTokens(w, &label, 1);
1818
1819 opcode.value = 0;
1820 opcode.opcodeType = VGPU10_OPCODE_NOP;
1821 opcode.instructionLength = 1;
1822 for (unsigned i = 0; i < pOpcode->cOpcodeToken - 3; ++i)
1823 dxbcByteWriterAddTokens(w, &opcode.value, 1);
1824
1825 /*
1826 * Subroutine.
1827 */
1828 DXBCByteWriterState savedWriterState;
1829 if (!dxbcByteWriterSetOffset(w, pOutctx->offSubroutine, &savedWriterState))
1830 return w->rc;
1831
1832 /* label */
1833 opcode.value = 0;
1834 opcode.opcodeType = VGPU10_OPCODE_LABEL;
1835 opcode.instructionLength = 3;
1836 dxbcByteWriterAddTokens(w, &opcode.value, 1);
1837
1838 operand.value = 0;
1839 operand.numComponents = VGPU10_OPERAND_1_COMPONENT;
1840 operand.operandType = VGPU10_OPERAND_TYPE_LABEL;
1841 operand.indexDimension = VGPU10_OPERAND_INDEX_1D;
1842 operand.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
1843 dxbcByteWriterAddTokens(w, &operand.value, 1);
1844 dxbcByteWriterAddTokens(w, &label, 1);
1845
1846 /* Just output UDIV for now. */
1847 opcode.value = 0;
1848 opcode.opcodeType = VGPU10_OPCODE_UDIV;
1849 opcode.instructionLength = pOpcode->cOpcodeToken;
1850 dxbcByteWriterAddTokens(w, &opcode.value, 1);
1851 dxbcByteWriterAddTokens(w, &pOpcode->paOpcodeToken[1], pOpcode->cOpcodeToken - 1);
1852
1853 /* ret */
1854 opcode.value = 0;
1855 opcode.opcodeType = VGPU10_OPCODE_RET;
1856 opcode.instructionLength = 1;
1857 dxbcByteWriterAddTokens(w, &opcode.value, 1);
1858
1859 pOutctx->offSubroutine = dxbcByteWriterSize(w);
1860 dxbcByteWriterRestore(w, &savedWriterState);
1861
1862 return w->rc;
1863}
1864
1865
1866static int dxbcOutputOpcode(DXBCOUTPUTCTX *pOutctx, DXBCByteWriter *w, VGPUOpcode *pOpcode)
1867{
1868#ifdef DEBUG
1869 void *pvBegin = dxbcByteWriterPtr(w);
1870#endif
1871
1872 if ( pOutctx->programToken.programType == VGPU10_PIXEL_SHADER
1873 && pOpcode->opcodeType == VGPU10_OPCODE_DCL_RESOURCE)
1874 {
1875 /** @todo This is a workaround. */
1876 /* Sometimes the guest (Mesa) created a shader with uninitialized resource dimension.
1877 * Use texture 2d because it is what a pixel shader normally uses.
1878 */
1879 ASSERT_GUEST_RETURN(pOpcode->cOpcodeToken == 4, VERR_INVALID_PARAMETER);
1880
1881 VGPU10OpcodeToken0 opcode;
1882 opcode.value = pOpcode->paOpcodeToken[0];
1883 if (opcode.resourceDimension == VGPU10_RESOURCE_DIMENSION_BUFFER)
1884 {
1885 opcode.resourceDimension = VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
1886 dxbcByteWriterAddTokens(w, &opcode.value, 1);
1887 dxbcByteWriterAddTokens(w, &pOpcode->paOpcodeToken[1], 2);
1888 uint32_t const returnType = 0x5555; /* float */
1889 dxbcByteWriterAddTokens(w, &returnType, 1);
1890 return VINF_SUCCESS;
1891 }
1892 }
1893 else if (pOpcode->opcodeType == VGPU10_OPCODE_VMWARE)
1894 {
1895 if (pOpcode->opcodeSubtype == VGPU10_VMWARE_OPCODE_IDIV)
1896 {
1897 return dxbcEmitVmwareIDIV(pOutctx, w, pOpcode);
1898 }
1899
1900 ASSERT_GUEST_FAILED_RETURN(VERR_NOT_SUPPORTED);
1901 }
1902
1903#ifdef DEBUG
1904 /* The code above must emit either nothing or everything. */
1905 Assert((uintptr_t)pvBegin == (uintptr_t)dxbcByteWriterPtr(w));
1906#endif
1907
1908 /* Just emit the unmodified instruction. */
1909 dxbcByteWriterAddTokens(w, pOpcode->paOpcodeToken, pOpcode->cOpcodeToken);
1910 return VINF_SUCCESS;
1911}
1912
1913
1914static int dxbcOutputFinalize(DXBCOUTPUTCTX *pOutctx, DXBCByteWriter *w)
1915{
1916 RT_NOREF(pOutctx, w);
1917 return VINF_SUCCESS;
1918}
1919
1920
1921static DECLCALLBACK(int) signatureEntryCmp(void const *pvElement1, void const *pvElement2, void *pvUser)
1922{
1923 SVGA3dDXSignatureEntry const *e1 = (SVGA3dDXSignatureEntry *)pvElement1;
1924 SVGA3dDXSignatureEntry const *e2 = (SVGA3dDXSignatureEntry *)pvElement2;
1925 RT_NOREF(pvUser);
1926
1927 if (e1->registerIndex < e2->registerIndex)
1928 return -1;
1929 if (e1->registerIndex > e2->registerIndex)
1930 return 1;
1931 if ((e1->mask & 0xf) < (e2->mask & 0xf))
1932 return -1;
1933 if ((e1->mask & 0xf) > (e2->mask & 0xf))
1934 return 1;
1935 return 0;
1936}
1937
1938
1939static void dxbcGenerateSemantics(DXShaderInfo *pInfo, uint32_t cSignature,
1940 SVGA3dDXSignatureEntry const *paSignature,
1941 DXShaderAttributeSemantic *paSemantic,
1942 uint32_t u32BlobType);
1943
1944
1945/*
1946 * Parse and verify the shader byte code. Extract input and output signatures into pInfo.
1947 */
1948int DXShaderParse(void const *pvShaderCode, uint32_t cbShaderCode, DXShaderInfo *pInfo)
1949{
1950 if (pInfo)
1951 RT_ZERO(*pInfo);
1952
1953 ASSERT_GUEST_RETURN(cbShaderCode <= SVGA3D_MAX_SHADER_MEMORY_BYTES, VERR_INVALID_PARAMETER);
1954 ASSERT_GUEST_RETURN((cbShaderCode & 0x3) == 0, VERR_INVALID_PARAMETER); /* Aligned to the token size. */
1955 ASSERT_GUEST_RETURN(cbShaderCode >= 8, VERR_INVALID_PARAMETER); /* At least program and length tokens. */
1956
1957 uint32_t const *paToken = (uint32_t *)pvShaderCode;
1958
1959 VGPU10ProgramToken const *pProgramToken = (VGPU10ProgramToken *)&paToken[0];
1960 ASSERT_GUEST_RETURN( pProgramToken->majorVersion >= 4
1961 && pProgramToken->programType <= VGPU10_COMPUTE_SHADER, VERR_INVALID_PARAMETER);
1962 if (pInfo)
1963 pInfo->enmProgramType = (VGPU10_PROGRAM_TYPE)pProgramToken->programType;
1964
1965 uint32_t const cToken = paToken[1];
1966 Log6(("Shader version %d.%d type %s(%d) Length %d\n",
1967 pProgramToken->majorVersion, pProgramToken->minorVersion, dxbcShaderTypeToString(pProgramToken->programType), pProgramToken->programType, cToken));
1968 ASSERT_GUEST_RETURN(cbShaderCode / 4 >= cToken, VERR_INVALID_PARAMETER); /* Declared length should be less or equal to the actual. */
1969
1970 /* Write the parsed (and possibly modified) shader to a memory buffer. */
1971 DXBCByteWriter dxbcByteWriter;
1972 DXBCByteWriter *w = &dxbcByteWriter;
1973 if (!dxbcByteWriterInit(w, 4096 + cbShaderCode))
1974 return VERR_NO_MEMORY;
1975
1976 dxbcByteWriterAddTokens(w, paToken, 2);
1977
1978 DXBCTokenReader parser;
1979 RT_ZERO(parser);
1980
1981 DXBCTokenReader *r = &parser;
1982 r->pToken = &paToken[2];
1983 r->cToken = r->cRemainingToken = cToken - 2;
1984
1985 DXBCOUTPUTCTX outctx;
1986 dxbcOutputInit(&outctx, pProgramToken, cToken);
1987
1988 int rc = VINF_SUCCESS;
1989 while (dxbcTokenReaderCanRead(r, 1))
1990 {
1991 uint32_t const offOpcode = dxbcByteWriterSize(w);
1992
1993 VGPUOpcode opcode;
1994 rc = dxbcParseOpcode(r, &opcode);
1995 ASSERT_GUEST_STMT_BREAK(RT_SUCCESS(rc), rc = VERR_INVALID_PARAMETER);
1996
1997 rc = dxbcOutputOpcode(&outctx, w, &opcode);
1998 AssertRCBreak(rc);
1999
2000 if (pInfo)
2001 {
2002 /* Remember offsets of DCL_RESOURCE instructions. */
2003 if ( outctx.programToken.programType == VGPU10_PIXEL_SHADER
2004 && opcode.opcodeType == VGPU10_OPCODE_DCL_RESOURCE)
2005 {
2006 if ( opcode.cOperand == 1
2007 && opcode.aValOperand[0].indexDimension == VGPU10_OPERAND_INDEX_1D
2008 && opcode.aValOperand[0].aOperandIndex[0].indexRepresentation == VGPU10_OPERAND_INDEX_IMMEDIATE32)
2009 {
2010 uint32_t const indexResource = opcode.aValOperand[0].aOperandIndex[0].iOperandImmediate;
2011 if (indexResource < SVGA3D_DX_MAX_SRVIEWS)
2012 {
2013 ASSERT_GUEST(pInfo->aOffDclResource[indexResource] == 0);
2014 pInfo->aOffDclResource[indexResource] = offOpcode;
2015 pInfo->cDclResource = RT_MAX(pInfo->cDclResource, indexResource + 1);
2016 }
2017 else
2018 ASSERT_GUEST_FAILED();
2019 }
2020 else
2021 ASSERT_GUEST_FAILED();
2022 }
2023
2024 /* Fetch signatures. */
2025 SVGA3dDXSignatureEntry *pSignatureEntry = NULL;
2026 switch (opcode.opcodeType)
2027 {
2028 case VGPU10_OPCODE_DCL_INPUT:
2029 case VGPU10_OPCODE_DCL_INPUT_SIV:
2030 //case VGPU10_OPCODE_DCL_INPUT_SGV:
2031 case VGPU10_OPCODE_DCL_INPUT_PS:
2032 //case VGPU10_OPCODE_DCL_INPUT_PS_SIV:
2033 //case VGPU10_OPCODE_DCL_INPUT_PS_SGV:
2034 //case VGPU10_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT:
2035 ASSERT_GUEST_STMT_BREAK(pInfo->cInputSignature < RT_ELEMENTS(pInfo->aInputSignature), rc = VERR_INVALID_PARAMETER);
2036 pSignatureEntry = &pInfo->aInputSignature[pInfo->cInputSignature++];
2037 break;
2038 case VGPU10_OPCODE_DCL_OUTPUT:
2039 case VGPU10_OPCODE_DCL_OUTPUT_SIV:
2040 case VGPU10_OPCODE_DCL_OUTPUT_SGV:
2041 //case VGPU10_OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT:
2042 ASSERT_GUEST_STMT_BREAK(pInfo->cOutputSignature < RT_ELEMENTS(pInfo->aOutputSignature), rc = VERR_INVALID_PARAMETER);
2043 pSignatureEntry = &pInfo->aOutputSignature[pInfo->cOutputSignature++];
2044 break;
2045 default:
2046 break;
2047 }
2048
2049 if (RT_FAILURE(rc))
2050 break;
2051
2052 if (pSignatureEntry)
2053 {
2054 ASSERT_GUEST_STMT_BREAK( opcode.aValOperand[0].aOperandIndex[0].indexRepresentation == VGPU10_OPERAND_INDEX_IMMEDIATE32
2055 || opcode.aValOperand[0].aOperandIndex[0].indexRepresentation == VGPU10_OPERAND_INDEX_IMMEDIATE64,
2056 rc = VERR_NOT_SUPPORTED);
2057
2058 uint32_t const indexDimension = opcode.aValOperand[0].indexDimension;
2059 if (indexDimension == VGPU10_OPERAND_INDEX_0D)
2060 {
2061 if (opcode.aValOperand[0].operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID)
2062 {
2063 pSignatureEntry->registerIndex = 0;
2064 pSignatureEntry->semanticName = SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID;
2065 }
2066 else if (opcode.aValOperand[0].operandType == VGPU10_OPERAND_TYPE_OUTPUT_DEPTH)
2067 {
2068 /* oDepth is always last in the signature. Register index is equal to 0xFFFFFFFF. */
2069 pSignatureEntry->registerIndex = 0xFFFFFFFF;
2070 pSignatureEntry->semanticName = SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED;
2071 }
2072 else if (opcode.aValOperand[0].operandType <= VGPU10_OPERAND_TYPE_SM50_MAX)
2073 {
2074 pSignatureEntry->registerIndex = 0;
2075 pSignatureEntry->semanticName = opcode.semanticName;
2076 }
2077 else
2078 ASSERT_GUEST_FAILED_STMT_BREAK(rc = VERR_NOT_SUPPORTED);
2079 }
2080 else
2081 {
2082 ASSERT_GUEST_STMT_BREAK( indexDimension == VGPU10_OPERAND_INDEX_1D
2083 || indexDimension == VGPU10_OPERAND_INDEX_2D
2084 || indexDimension == VGPU10_OPERAND_INDEX_3D,
2085 rc = VERR_NOT_SUPPORTED);
2086 /* The register index seems to be in the highest dimension. */
2087 pSignatureEntry->registerIndex = opcode.aValOperand[0].aOperandIndex[indexDimension - VGPU10_OPERAND_INDEX_1D].iOperandImmediate;
2088 pSignatureEntry->semanticName = opcode.semanticName;
2089 }
2090 pSignatureEntry->mask = opcode.aValOperand[0].mask;
2091 pSignatureEntry->componentType = SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN; /// @todo Proper value? Seems that it is not important.
2092 pSignatureEntry->minPrecision = SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT;
2093 }
2094 }
2095 }
2096
2097 if (RT_FAILURE(rc))
2098 {
2099 return rc;
2100 }
2101
2102 rc = dxbcOutputFinalize(&outctx, w);
2103 if (RT_FAILURE(rc))
2104 {
2105 return rc;
2106 }
2107
2108 dxbcByteWriterFetchData(w, &pInfo->pvBytecode, &pInfo->cbBytecode);
2109 uint32_t *pcOutputToken = (uint32_t *)pInfo->pvBytecode + 1;
2110 *pcOutputToken = pInfo->cbBytecode / 4;
2111
2112 /* Sort signatures by register index and mask because the host API need them to be sorted. */
2113 if (pInfo->cInputSignature)
2114 {
2115 RTSortShell(pInfo->aInputSignature, pInfo->cInputSignature, sizeof(pInfo->aInputSignature[0]),
2116 signatureEntryCmp, NULL);
2117 dxbcGenerateSemantics(pInfo, pInfo->cInputSignature,
2118 pInfo->aInputSignature,
2119 pInfo->aInputSemantic, DXBC_BLOB_TYPE_ISGN);
2120 }
2121 if (pInfo->cOutputSignature)
2122 {
2123 RTSortShell(pInfo->aOutputSignature, pInfo->cOutputSignature, sizeof(pInfo->aOutputSignature[0]),
2124 signatureEntryCmp, NULL);
2125 dxbcGenerateSemantics(pInfo, pInfo->cOutputSignature,
2126 pInfo->aOutputSignature,
2127 pInfo->aOutputSemantic, DXBC_BLOB_TYPE_OSGN);
2128 }
2129 if (pInfo->cPatchConstantSignature)
2130 {
2131 RTSortShell(pInfo->aPatchConstantSignature, pInfo->cPatchConstantSignature, sizeof(pInfo->aPatchConstantSignature[0]),
2132 signatureEntryCmp, NULL);
2133 dxbcGenerateSemantics(pInfo, pInfo->cPatchConstantSignature,
2134 pInfo->aPatchConstantSignature,
2135 pInfo->aPatchConstantSemantic, DXBC_BLOB_TYPE_PCSG);
2136 }
2137
2138#ifdef LOG_ENABLED
2139 if (pInfo->cInputSignature)
2140 {
2141 Log6(("Input signatures:\n"));
2142 for (uint32_t i = 0; i < pInfo->cInputSignature; ++i)
2143 Log6((" [%u]: %u %u 0x%X, %s %d\n",
2144 i, pInfo->aInputSignature[i].registerIndex, pInfo->aInputSignature[i].semanticName, pInfo->aInputSignature[i].mask,
2145 pInfo->aInputSemantic[i].pcszSemanticName, pInfo->aInputSemantic[i].SemanticIndex));
2146 }
2147 if (pInfo->cOutputSignature)
2148 {
2149 Log6(("Output signatures:\n"));
2150 for (uint32_t i = 0; i < pInfo->cOutputSignature; ++i)
2151 Log6((" [%u]: %u %u 0x%X, %s %d\n",
2152 i, pInfo->aOutputSignature[i].registerIndex, pInfo->aOutputSignature[i].semanticName, pInfo->aOutputSignature[i].mask,
2153 pInfo->aOutputSemantic[i].pcszSemanticName, pInfo->aOutputSemantic[i].SemanticIndex));
2154 }
2155 if (pInfo->cPatchConstantSignature)
2156 {
2157 Log6(("Patch constant signatures:\n"));
2158 for (uint32_t i = 0; i < pInfo->cPatchConstantSignature; ++i)
2159 Log6((" [%u]: %u %u 0x%X, %s %d\n",
2160 i, pInfo->aPatchConstantSignature[i].registerIndex, pInfo->aPatchConstantSignature[i].semanticName, pInfo->aPatchConstantSignature[i].mask,
2161 pInfo->aPatchConstantSemantic[i].pcszSemanticName, pInfo->aPatchConstantSemantic[i].SemanticIndex));
2162 }
2163#endif
2164
2165 return VINF_SUCCESS;
2166}
2167
2168void DXShaderGenerateSemantics(DXShaderInfo *pInfo)
2169{
2170 if (pInfo->cInputSignature)
2171 dxbcGenerateSemantics(pInfo, pInfo->cInputSignature,
2172 pInfo->aInputSignature,
2173 pInfo->aInputSemantic, DXBC_BLOB_TYPE_ISGN);
2174 if (pInfo->cOutputSignature)
2175 dxbcGenerateSemantics(pInfo, pInfo->cOutputSignature,
2176 pInfo->aOutputSignature,
2177 pInfo->aOutputSemantic, DXBC_BLOB_TYPE_OSGN);
2178 if (pInfo->cPatchConstantSignature)
2179 dxbcGenerateSemantics(pInfo, pInfo->cPatchConstantSignature,
2180 pInfo->aPatchConstantSignature,
2181 pInfo->aPatchConstantSemantic, DXBC_BLOB_TYPE_PCSG);
2182}
2183
2184void DXShaderFree(DXShaderInfo *pInfo)
2185{
2186 RTMemFree(pInfo->pvBytecode);
2187 RT_ZERO(*pInfo);
2188}
2189
2190
2191#if 0 // Unused. Replaced with dxbcSemanticInfo.
2192static char const *dxbcSemanticName(SVGA3dDXSignatureSemanticName enmSemanticName)
2193{
2194 /* https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-semantics#system-value-semantics */
2195 switch (enmSemanticName)
2196 {
2197 case SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION: return "SV_Position";
2198 case SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE: return "SV_ClipDistance";
2199 case SVGADX_SIGNATURE_SEMANTIC_NAME_CULL_DISTANCE: return "SV_CullDistance";
2200 case SVGADX_SIGNATURE_SEMANTIC_NAME_RENDER_TARGET_ARRAY_INDEX: return "SV_RenderTargetArrayIndex";
2201 case SVGADX_SIGNATURE_SEMANTIC_NAME_VIEWPORT_ARRAY_INDEX: return "SV_ViewportArrayIndex";
2202 case SVGADX_SIGNATURE_SEMANTIC_NAME_VERTEX_ID: return "SV_VertexID";
2203 case SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID: return "SV_PrimitiveID";
2204 case SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID: return "SV_InstanceID";
2205 case SVGADX_SIGNATURE_SEMANTIC_NAME_IS_FRONT_FACE: return "SV_IsFrontFace";
2206 case SVGADX_SIGNATURE_SEMANTIC_NAME_SAMPLE_INDEX: return "SV_SampleIndex";
2207 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR: return "SV_FinalQuadUeq0EdgeTessFactor";
2208 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR: return "SV_FinalQuadVeq0EdgeTessFactor";
2209 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR: return "SV_FinalQuadUeq1EdgeTessFactor";
2210 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR: return "SV_FinalQuadVeq1EdgeTessFactor";
2211 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR: return "SV_FinalQuadUInsideTessFactor";
2212 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR: return "SV_FinalQuadVInsideTessFactor";
2213 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR: return "SV_FinalTriUeq0EdgeTessFactor";
2214 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR: return "SV_FinalTriVeq0EdgeTessFactor";
2215 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR: return "SV_FinalTriWeq0EdgeTessFactor";
2216 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR: return "SV_FinalTriInsideTessFactor";
2217 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DETAIL_TESSFACTOR: return "SV_FinalLineDetailTessFactor";
2218 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DENSITY_TESSFACTOR: return "SV_FinalLineDensityTessFactor";
2219 default:
2220 Assert(enmSemanticName == SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
2221 break;
2222 }
2223 /* Generic. Arbitrary name. It does not have any meaning. */
2224 return "ATTRIB";
2225}
2226#endif
2227
2228
2229/* https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-semantics#system-value-semantics
2230 * Type:
2231 * 0 - undefined
2232 * 1 - unsigned int
2233 * 2 - signed int
2234 * 3 - float
2235 */
2236typedef struct VGPUSemanticInfo
2237{
2238 char const *pszName;
2239 uint32_t u32Type;
2240} VGPUSemanticInfo;
2241
2242static VGPUSemanticInfo const g_aSemanticInfo[SVGADX_SIGNATURE_SEMANTIC_NAME_MAX] =
2243{
2244 { "ATTRIB", 0 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED 0
2245 { "SV_Position", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION 1
2246 { "SV_ClipDistance", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE 2
2247 { "SV_CullDistance", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_CULL_DISTANCE 3
2248 { "SV_RenderTargetArrayIndex", 1 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_RENDER_TARGET_ARRAY_INDEX 4
2249 { "SV_ViewportArrayIndex", 1 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_VIEWPORT_ARRAY_INDEX 5
2250 { "SV_VertexID", 1 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_VERTEX_ID 6
2251 { "SV_PrimitiveID", 1 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID 7
2252 { "SV_InstanceID", 1 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID 8
2253 { "SV_IsFrontFace", 1 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_IS_FRONT_FACE 9
2254 { "SV_SampleIndex", 1 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_SAMPLE_INDEX 10
2255 /** @todo Is this a correct name for all TessFactors? */
2256 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR 11
2257 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR 12
2258 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR 13
2259 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR 14
2260 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR 15
2261 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR 16
2262 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR 17
2263 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR 18
2264 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR 19
2265 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR 20
2266 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DETAIL_TESSFACTOR 21
2267 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DENSITY_TESSFACTOR 22
2268};
2269
2270static VGPUSemanticInfo const g_SemanticPSOutput =
2271 { "SV_TARGET", 3 }; // SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED 0
2272
2273
2274static VGPUSemanticInfo const *dxbcSemanticInfo(DXShaderInfo const *pInfo, SVGA3dDXSignatureSemanticName enmSemanticName, uint32_t u32BlobType)
2275{
2276 if (enmSemanticName < RT_ELEMENTS(g_aSemanticInfo))
2277 {
2278 if ( enmSemanticName == 0
2279 && pInfo->enmProgramType == VGPU10_PIXEL_SHADER
2280 && u32BlobType == DXBC_BLOB_TYPE_OSGN)
2281 return &g_SemanticPSOutput;
2282 return &g_aSemanticInfo[enmSemanticName];
2283 }
2284 return &g_aSemanticInfo[0];
2285}
2286
2287
2288static void dxbcGenerateSemantics(DXShaderInfo *pInfo, uint32_t cSignature,
2289 SVGA3dDXSignatureEntry const *paSignature,
2290 DXShaderAttributeSemantic *paSemantic,
2291 uint32_t u32BlobType)
2292{
2293 for (uint32_t iSignatureEntry = 0; iSignatureEntry < cSignature; ++iSignatureEntry)
2294 {
2295 SVGA3dDXSignatureEntry const *src = &paSignature[iSignatureEntry];
2296 DXShaderAttributeSemantic *dst = &paSemantic[iSignatureEntry];
2297
2298 ASSERT_GUEST_RETURN_VOID(src->semanticName < SVGADX_SIGNATURE_SEMANTIC_NAME_MAX);
2299
2300 VGPUSemanticInfo const *pSemanticInfo = dxbcSemanticInfo(pInfo, src->semanticName, u32BlobType);
2301 dst->pcszSemanticName = pSemanticInfo->pszName;
2302 dst->SemanticIndex = 0;
2303 for (uint32_t i = 0; i < iSignatureEntry; ++i)
2304 {
2305 DXShaderAttributeSemantic const *pSemantic = &paSemantic[i];
2306 if (RTStrCmp(pSemantic->pcszSemanticName, dst->pcszSemanticName) == 0)
2307 ++dst->SemanticIndex;
2308 }
2309 }
2310}
2311
2312
2313static int dxbcCreateIOSGNBlob(DXShaderInfo const *pInfo, DXBCHeader *pHdr, uint32_t u32BlobType, uint32_t cSignature,
2314 SVGA3dDXSignatureEntry const *paSignature, DXShaderAttributeSemantic const *paSemantic, DXBCByteWriter *w)
2315{
2316 RT_NOREF(pInfo);
2317 AssertReturn(cSignature <= SVGA3D_DX_SM41_MAX_VERTEXINPUTREGISTERS, VERR_INVALID_PARAMETER);
2318
2319 uint32_t cbBlob = RT_UOFFSETOF_DYN(DXBCBlobIOSGN, aElement[cSignature]);
2320 if (!dxbcByteWriterCanWrite(w, sizeof(DXBCBlobHeader) + cbBlob))
2321 return VERR_NO_MEMORY;
2322
2323 DXBCBlobHeader *pHdrBlob = (DXBCBlobHeader *)dxbcByteWriterPtr(w);
2324 pHdrBlob->u32BlobType = u32BlobType;
2325 // pHdrBlob->cbBlob = 0;
2326
2327 DXBCBlobIOSGN *pHdrISGN = (DXBCBlobIOSGN *)&pHdrBlob[1];
2328 pHdrISGN->cElement = cSignature;
2329 pHdrISGN->offElement = RT_UOFFSETOF(DXBCBlobIOSGN, aElement[0]);
2330
2331 for (uint32_t iSignatureEntry = 0; iSignatureEntry < cSignature; ++iSignatureEntry)
2332 {
2333 SVGA3dDXSignatureEntry const *srcEntry = &paSignature[iSignatureEntry];
2334 DXShaderAttributeSemantic const *srcSemantic = &paSemantic[iSignatureEntry];
2335 DXBCBlobIOSGNElement *dst = &pHdrISGN->aElement[iSignatureEntry];
2336
2337 dst->offElementName = 0;
2338 for (uint32_t i = 0; i < iSignatureEntry; ++i)
2339 {
2340 DXBCBlobIOSGNElement const *pElement = &pHdrISGN->aElement[i];
2341 char const *pszElementName = (char *)pHdrISGN + pElement->offElementName;
2342 if (RTStrCmp(pszElementName, srcSemantic->pcszSemanticName) == 0)
2343 {
2344 dst->offElementName = pElement->offElementName;
2345 break;
2346 }
2347 }
2348 dst->idxSemantic = srcSemantic->SemanticIndex;
2349 dst->enmSystemValue = srcEntry->semanticName;
2350 dst->enmComponentType = srcEntry->componentType;
2351 dst->idxRegister = srcEntry->registerIndex;
2352 dst->u.mask = srcEntry->mask;
2353
2354 if (dst->offElementName == 0)
2355 {
2356 /* Store the semantic name for this element. */
2357 dst->offElementName = cbBlob; /* Offset of the semantic's name relative to the start of the blob (without DXBCBlobHeader). */
2358 uint32_t const cbElementName = (uint32_t)strlen(srcSemantic->pcszSemanticName) + 1;
2359 if (!dxbcByteWriterCanWrite(w, sizeof(DXBCBlobHeader) + cbBlob + cbElementName))
2360 return VERR_NO_MEMORY;
2361
2362 memcpy((char *)pHdrISGN + dst->offElementName, srcSemantic->pcszSemanticName, cbElementName);
2363 cbBlob += cbElementName;
2364 }
2365 }
2366
2367 /* Blobs are 4 bytes aligned. Commit the blob data. */
2368 cbBlob = RT_ALIGN_32(cbBlob, 4);
2369 pHdrBlob->cbBlob = cbBlob;
2370 pHdr->cbTotal += cbBlob + sizeof(DXBCBlobHeader);
2371 dxbcByteWriterCommit(w, cbBlob + sizeof(DXBCBlobHeader));
2372 return VINF_SUCCESS;
2373}
2374
2375
2376static int dxbcCreateSHDRBlob(DXBCHeader *pHdr, uint32_t u32BlobType,
2377 void const *pvShader, uint32_t cbShader, DXBCByteWriter *w)
2378{
2379 uint32_t cbBlob = cbShader;
2380 if (!dxbcByteWriterCanWrite(w, sizeof(DXBCBlobHeader) + cbBlob))
2381 return VERR_NO_MEMORY;
2382
2383 DXBCBlobHeader *pHdrBlob = (DXBCBlobHeader *)dxbcByteWriterPtr(w);
2384 pHdrBlob->u32BlobType = u32BlobType;
2385 // pHdrBlob->cbBlob = 0;
2386
2387 memcpy(&pHdrBlob[1], pvShader, cbShader);
2388
2389 /* Blobs are 4 bytes aligned. Commit the blob data. */
2390 cbBlob = RT_ALIGN_32(cbBlob, 4);
2391 pHdrBlob->cbBlob = cbBlob;
2392 pHdr->cbTotal += cbBlob + sizeof(DXBCBlobHeader);
2393 dxbcByteWriterCommit(w, cbBlob + sizeof(DXBCBlobHeader));
2394 return VINF_SUCCESS;
2395}
2396
2397
2398/*
2399 * Create a DXBC container with signature and shader code data blobs.
2400 */
2401static int dxbcCreateFromInfo(DXShaderInfo const *pInfo, void const *pvShader, uint32_t cbShader, DXBCByteWriter *w)
2402{
2403 int rc;
2404
2405 /* Create a DXBC container with ISGN, OSGN and SHDR blobs. */
2406 uint32_t cBlob = 3;
2407 if ( pInfo->enmProgramType == VGPU10_HULL_SHADER
2408 || pInfo->enmProgramType == VGPU10_DOMAIN_SHADER)
2409 ++cBlob;
2410
2411 uint32_t const cbHdr = RT_UOFFSETOF_DYN(DXBCHeader, aBlobOffset[cBlob]); /* Header with blob offsets. */
2412 if (!dxbcByteWriterCanWrite(w, cbHdr))
2413 return VERR_NO_MEMORY;
2414
2415 /* Container header. */
2416 DXBCHeader *pHdr = (DXBCHeader *)dxbcByteWriterPtr(w);
2417 pHdr->u32DXBC = DXBC_MAGIC;
2418 // RT_ZERO(pHdr->au8Hash);
2419 pHdr->u32Version = 1;
2420 pHdr->cbTotal = cbHdr;
2421 pHdr->cBlob = cBlob;
2422 //RT_ZERO(pHdr->aBlobOffset);
2423 dxbcByteWriterCommit(w, cbHdr);
2424
2425#ifdef LOG_ENABLED
2426 if (pInfo->cInputSignature)
2427 {
2428 Log6(("Input signatures:\n"));
2429 for (uint32_t i = 0; i < pInfo->cInputSignature; ++i)
2430 Log6((" [%u]: %u %u 0x%X, %s %d\n",
2431 i, pInfo->aInputSignature[i].registerIndex, pInfo->aInputSignature[i].semanticName, pInfo->aInputSignature[i].mask,
2432 pInfo->aInputSemantic[i].pcszSemanticName, pInfo->aInputSemantic[i].SemanticIndex));
2433 }
2434 if (pInfo->cOutputSignature)
2435 {
2436 Log6(("Output signatures:\n"));
2437 for (uint32_t i = 0; i < pInfo->cOutputSignature; ++i)
2438 Log6((" [%u]: %u %u 0x%X, %s %d\n",
2439 i, pInfo->aOutputSignature[i].registerIndex, pInfo->aOutputSignature[i].semanticName, pInfo->aOutputSignature[i].mask,
2440 pInfo->aOutputSemantic[i].pcszSemanticName, pInfo->aOutputSemantic[i].SemanticIndex));
2441 }
2442 if (pInfo->cPatchConstantSignature)
2443 {
2444 Log6(("Patch constant signatures:\n"));
2445 for (uint32_t i = 0; i < pInfo->cPatchConstantSignature; ++i)
2446 Log6((" [%u]: %u %u 0x%X, %s %d\n",
2447 i, pInfo->aPatchConstantSignature[i].registerIndex, pInfo->aPatchConstantSignature[i].semanticName, pInfo->aPatchConstantSignature[i].mask,
2448 pInfo->aPatchConstantSemantic[i].pcszSemanticName, pInfo->aPatchConstantSemantic[i].SemanticIndex));
2449 }
2450#endif
2451
2452 /* Blobs. */
2453 uint32_t iBlob = 0;
2454
2455 pHdr->aBlobOffset[iBlob++] = dxbcByteWriterSize(w);
2456 rc = dxbcCreateIOSGNBlob(pInfo, pHdr, DXBC_BLOB_TYPE_ISGN, pInfo->cInputSignature, &pInfo->aInputSignature[0], pInfo->aInputSemantic, w);
2457 AssertRCReturn(rc, rc);
2458
2459 pHdr->aBlobOffset[iBlob++] = dxbcByteWriterSize(w);
2460 rc = dxbcCreateIOSGNBlob(pInfo, pHdr, DXBC_BLOB_TYPE_OSGN, pInfo->cOutputSignature, &pInfo->aOutputSignature[0], pInfo->aOutputSemantic, w);
2461 AssertRCReturn(rc, rc);
2462
2463 if ( pInfo->enmProgramType == VGPU10_HULL_SHADER
2464 || pInfo->enmProgramType == VGPU10_DOMAIN_SHADER)
2465 {
2466 pHdr->aBlobOffset[iBlob++] = dxbcByteWriterSize(w);
2467 rc = dxbcCreateIOSGNBlob(pInfo, pHdr, DXBC_BLOB_TYPE_PCSG, pInfo->cPatchConstantSignature, &pInfo->aPatchConstantSignature[0], pInfo->aPatchConstantSemantic, w);
2468 AssertRCReturn(rc, rc);
2469 }
2470
2471 pHdr->aBlobOffset[iBlob++] = dxbcByteWriterSize(w);
2472 rc = dxbcCreateSHDRBlob(pHdr, DXBC_BLOB_TYPE_SHDR, pvShader, cbShader, w);
2473 AssertRCReturn(rc, rc);
2474
2475 Assert(iBlob == cBlob);
2476
2477 AssertCompile(RT_UOFFSETOF(DXBCHeader, u32Version) == 0x14);
2478 dxbcHash(&pHdr->u32Version, pHdr->cbTotal - RT_UOFFSETOF(DXBCHeader, u32Version), pHdr->au8Hash);
2479
2480 return VINF_SUCCESS;
2481}
2482
2483
2484int DXShaderCreateDXBC(DXShaderInfo const *pInfo, void **ppvDXBC, uint32_t *pcbDXBC)
2485{
2486 /* Build DXBC container. */
2487 int rc;
2488 DXBCByteWriter dxbcByteWriter;
2489 DXBCByteWriter *w = &dxbcByteWriter;
2490 if (dxbcByteWriterInit(w, 4096 + pInfo->cbBytecode))
2491 {
2492 rc = dxbcCreateFromInfo(pInfo, pInfo->pvBytecode, pInfo->cbBytecode, w);
2493 if (RT_SUCCESS(rc))
2494 dxbcByteWriterFetchData(w, ppvDXBC, pcbDXBC);
2495 }
2496 else
2497 rc = VERR_NO_MEMORY;
2498 return rc;
2499}
2500
2501
2502static char const *dxbcGetOutputSemanticName(DXShaderInfo const *pInfo, uint32_t idxRegister, uint32_t u32BlobType,
2503 uint32_t cSignature, SVGA3dDXSignatureEntry const *paSignature,
2504 SVGA3dDXSignatureSemanticName *pSemanticName)
2505{
2506 for (uint32_t i = 0; i < cSignature; ++i)
2507 {
2508 SVGA3dDXSignatureEntry const *p = &paSignature[i];
2509 if (p->registerIndex == idxRegister)
2510 {
2511 AssertReturn(p->semanticName < SVGADX_SIGNATURE_SEMANTIC_NAME_MAX, NULL);
2512 VGPUSemanticInfo const *pSemanticInfo = dxbcSemanticInfo(pInfo, p->semanticName, u32BlobType);
2513 *pSemanticName = p->semanticName;
2514 return pSemanticInfo->pszName;
2515 }
2516 }
2517 return NULL;
2518}
2519
2520char const *DXShaderGetOutputSemanticName(DXShaderInfo const *pInfo, uint32_t idxRegister, SVGA3dDXSignatureSemanticName *pSemanticName)
2521{
2522 return dxbcGetOutputSemanticName(pInfo, idxRegister, DXBC_BLOB_TYPE_OSGN, pInfo->cOutputSignature, &pInfo->aOutputSignature[0], pSemanticName);
2523}
2524
2525VGPU10_RESOURCE_RETURN_TYPE DXShaderResourceReturnTypeFromFormat(SVGA3dSurfaceFormat format)
2526{
2527 /** @todo This is auto-generated from format names and needs a review. */
2528 switch (format)
2529 {
2530 case SVGA3D_R32G32B32A32_UINT: return VGPU10_RETURN_TYPE_UINT;
2531 case SVGA3D_R32G32B32A32_SINT: return VGPU10_RETURN_TYPE_SINT;
2532 case SVGA3D_R32G32B32_FLOAT: return VGPU10_RETURN_TYPE_FLOAT;
2533 case SVGA3D_R32G32B32_UINT: return VGPU10_RETURN_TYPE_UINT;
2534 case SVGA3D_R32G32B32_SINT: return VGPU10_RETURN_TYPE_SINT;
2535 case SVGA3D_R16G16B16A16_UINT: return VGPU10_RETURN_TYPE_UINT;
2536 case SVGA3D_R16G16B16A16_SNORM: return VGPU10_RETURN_TYPE_SNORM;
2537 case SVGA3D_R16G16B16A16_SINT: return VGPU10_RETURN_TYPE_SINT;
2538 case SVGA3D_R32G32_UINT: return VGPU10_RETURN_TYPE_UINT;
2539 case SVGA3D_R32G32_SINT: return VGPU10_RETURN_TYPE_SINT;
2540 case SVGA3D_D32_FLOAT_S8X24_UINT: return VGPU10_RETURN_TYPE_UINT;
2541 case SVGA3D_R32_FLOAT_X8X24: return VGPU10_RETURN_TYPE_FLOAT;
2542 case SVGA3D_X32_G8X24_UINT: return VGPU10_RETURN_TYPE_UINT;
2543 case SVGA3D_R10G10B10A2_UINT: return VGPU10_RETURN_TYPE_UINT;
2544 case SVGA3D_R11G11B10_FLOAT: return VGPU10_RETURN_TYPE_FLOAT;
2545 case SVGA3D_R8G8B8A8_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2546 case SVGA3D_R8G8B8A8_UNORM_SRGB: return VGPU10_RETURN_TYPE_UNORM;
2547 case SVGA3D_R8G8B8A8_UINT: return VGPU10_RETURN_TYPE_UINT;
2548 case SVGA3D_R8G8B8A8_SINT: return VGPU10_RETURN_TYPE_SINT;
2549 case SVGA3D_R16G16_UINT: return VGPU10_RETURN_TYPE_UINT;
2550 case SVGA3D_R16G16_SINT: return VGPU10_RETURN_TYPE_SINT;
2551 case SVGA3D_D32_FLOAT: return VGPU10_RETURN_TYPE_FLOAT;
2552 case SVGA3D_R32_UINT: return VGPU10_RETURN_TYPE_UINT;
2553 case SVGA3D_R32_SINT: return VGPU10_RETURN_TYPE_SINT;
2554 case SVGA3D_D24_UNORM_S8_UINT: return VGPU10_RETURN_TYPE_UNORM;
2555 case SVGA3D_R24_UNORM_X8: return VGPU10_RETURN_TYPE_UNORM;
2556 case SVGA3D_X24_G8_UINT: return VGPU10_RETURN_TYPE_UINT;
2557 case SVGA3D_R8G8_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2558 case SVGA3D_R8G8_UINT: return VGPU10_RETURN_TYPE_UINT;
2559 case SVGA3D_R8G8_SINT: return VGPU10_RETURN_TYPE_SINT;
2560 case SVGA3D_R16_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2561 case SVGA3D_R16_UINT: return VGPU10_RETURN_TYPE_UINT;
2562 case SVGA3D_R16_SNORM: return VGPU10_RETURN_TYPE_SNORM;
2563 case SVGA3D_R16_SINT: return VGPU10_RETURN_TYPE_SINT;
2564 case SVGA3D_R8_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2565 case SVGA3D_R8_UINT: return VGPU10_RETURN_TYPE_UINT;
2566 case SVGA3D_R8_SNORM: return VGPU10_RETURN_TYPE_SNORM;
2567 case SVGA3D_R8_SINT: return VGPU10_RETURN_TYPE_SINT;
2568 case SVGA3D_R8G8_B8G8_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2569 case SVGA3D_G8R8_G8B8_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2570 case SVGA3D_BC1_UNORM_SRGB: return VGPU10_RETURN_TYPE_UNORM;
2571 case SVGA3D_BC2_UNORM_SRGB: return VGPU10_RETURN_TYPE_UNORM;
2572 case SVGA3D_BC3_UNORM_SRGB: return VGPU10_RETURN_TYPE_UNORM;
2573 case SVGA3D_BC4_SNORM: return VGPU10_RETURN_TYPE_SNORM;
2574 case SVGA3D_BC5_SNORM: return VGPU10_RETURN_TYPE_SNORM;
2575 case SVGA3D_R10G10B10_XR_BIAS_A2_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2576 case SVGA3D_B8G8R8A8_UNORM_SRGB: return VGPU10_RETURN_TYPE_UNORM;
2577 case SVGA3D_B8G8R8X8_UNORM_SRGB: return VGPU10_RETURN_TYPE_UNORM;
2578 case SVGA3D_R32G32B32A32_FLOAT: return VGPU10_RETURN_TYPE_FLOAT;
2579 case SVGA3D_R16G16B16A16_FLOAT: return VGPU10_RETURN_TYPE_FLOAT;
2580 case SVGA3D_R16G16B16A16_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2581 case SVGA3D_R32G32_FLOAT: return VGPU10_RETURN_TYPE_FLOAT;
2582 case SVGA3D_R10G10B10A2_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2583 case SVGA3D_R8G8B8A8_SNORM: return VGPU10_RETURN_TYPE_SNORM;
2584 case SVGA3D_R16G16_FLOAT: return VGPU10_RETURN_TYPE_FLOAT;
2585 case SVGA3D_R16G16_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2586 case SVGA3D_R16G16_SNORM: return VGPU10_RETURN_TYPE_SNORM;
2587 case SVGA3D_R32_FLOAT: return VGPU10_RETURN_TYPE_FLOAT;
2588 case SVGA3D_R8G8_SNORM: return VGPU10_RETURN_TYPE_SNORM;
2589 case SVGA3D_R16_FLOAT: return VGPU10_RETURN_TYPE_FLOAT;
2590 case SVGA3D_D16_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2591 case SVGA3D_A8_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2592 case SVGA3D_BC1_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2593 case SVGA3D_BC2_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2594 case SVGA3D_BC3_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2595 case SVGA3D_B5G6R5_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2596 case SVGA3D_B5G5R5A1_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2597 case SVGA3D_B8G8R8A8_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2598 case SVGA3D_B8G8R8X8_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2599 case SVGA3D_BC4_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2600 case SVGA3D_BC5_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2601 case SVGA3D_B4G4R4A4_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2602 case SVGA3D_BC7_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2603 case SVGA3D_BC7_UNORM_SRGB: return VGPU10_RETURN_TYPE_UNORM;
2604 case SVGA3D_R9G9B9E5_SHAREDEXP: return VGPU10_RETURN_TYPE_FLOAT;
2605 default:
2606 break;
2607 }
2608 return VGPU10_RETURN_TYPE_UNORM;
2609}
2610
2611int DXShaderUpdateResources(DXShaderInfo const *pInfo, VGPU10_RESOURCE_DIMENSION *paResourceDimension,
2612 VGPU10_RESOURCE_RETURN_TYPE *paResourceReturnType, uint32_t cResources)
2613{
2614 for (uint32_t i = 0; i < pInfo->cDclResource; ++i)
2615 {
2616 VGPU10_RESOURCE_DIMENSION const resourceDimension = i < cResources ? paResourceDimension[i] : VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
2617 AssertContinue(resourceDimension <= VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY);
2618
2619 VGPU10_RESOURCE_RETURN_TYPE const resourceReturnType = i < cResources ? paResourceReturnType[i] : VGPU10_RETURN_TYPE_FLOAT;
2620 AssertContinue(resourceReturnType <= VGPU10_RETURN_TYPE_MIXED);
2621
2622 uint32_t const offToken = pInfo->aOffDclResource[i];
2623 AssertContinue(offToken < pInfo->cbBytecode);
2624 if (offToken == 0) /* nothing at this index */
2625 continue;
2626
2627 uint32_t *paToken = (uint32_t *)((uintptr_t)pInfo->pvBytecode + offToken);
2628
2629 VGPU10OpcodeToken0 *pOpcode = (VGPU10OpcodeToken0 *)&paToken[0];
2630 pOpcode->resourceDimension = resourceDimension;
2631 // paToken[1] unmodified
2632 // paToken[2] unmodified
2633 VGPU10ResourceReturnTypeToken *pReturnTypeToken = (VGPU10ResourceReturnTypeToken *)&paToken[3];
2634 pReturnTypeToken->component0 = (uint8_t)resourceReturnType;
2635 pReturnTypeToken->component1 = (uint8_t)resourceReturnType;
2636 pReturnTypeToken->component2 = (uint8_t)resourceReturnType;
2637 pReturnTypeToken->component3 = (uint8_t)resourceReturnType;
2638 }
2639
2640 return VINF_SUCCESS;
2641}
2642
2643#ifdef DXBC_STANDALONE_TEST
2644static int dxbcCreateFromBytecode(void const *pvShaderCode, uint32_t cbShaderCode, void **ppvDXBC, uint32_t *pcbDXBC)
2645{
2646 /* Parse the shader bytecode and create DXBC container with resource, signature and shader bytecode blobs. */
2647 DXShaderInfo info;
2648 RT_ZERO(info);
2649 int rc = DXShaderParse(pvShaderCode, cbShaderCode, &info);
2650 if (RT_SUCCESS(rc))
2651 rc = DXShaderCreateDXBC(&info, ppvDXBC, pcbDXBC);
2652 return rc;
2653}
2654
2655static int parseShaderVM(void const *pvShaderCode, uint32_t cbShaderCode)
2656{
2657 void *pv = NULL;
2658 uint32_t cb = 0;
2659 int rc = dxbcCreateFromBytecode(pvShaderCode, cbShaderCode, &pv, &cb);
2660 if (RT_SUCCESS(rc))
2661 {
2662 /* Hexdump DXBC */
2663 printf("{\n");
2664 uint8_t *pu8 = (uint8_t *)pv;
2665 for (uint32_t i = 0; i < cb; ++i)
2666 {
2667 if ((i % 16) == 0)
2668 {
2669 if (i > 0)
2670 printf(",\n");
2671
2672 printf(" 0x%02x", pu8[i]);
2673 }
2674 else
2675 {
2676 printf(", 0x%02x", pu8[i]);
2677 }
2678 }
2679 printf("\n");
2680 printf("};\n");
2681
2682 RTMemFree(pv);
2683 }
2684
2685 return rc;
2686}
2687
2688static DXBCBlobHeader *dxbcFindBlob(DXBCHeader *pDXBCHeader, uint32_t u32BlobType)
2689{
2690 uint8_t const *pu8DXBCBegin = (uint8_t *)pDXBCHeader;
2691 for (uint32_t i = 0; i < pDXBCHeader->cBlob; ++i)
2692 {
2693 DXBCBlobHeader *pCurrentBlob = (DXBCBlobHeader *)&pu8DXBCBegin[pDXBCHeader->aBlobOffset[i]];
2694 if (pCurrentBlob->u32BlobType == u32BlobType)
2695 return pCurrentBlob;
2696 }
2697 return NULL;
2698}
2699
2700static int dxbcExtractShaderCode(DXBCHeader *pDXBCHeader, void **ppvCode, uint32_t *pcbCode)
2701{
2702 DXBCBlobHeader *pBlob = dxbcFindBlob(pDXBCHeader, DXBC_BLOB_TYPE_SHDR);
2703 AssertReturn(pBlob, VERR_NOT_IMPLEMENTED);
2704
2705 DXBCBlobSHDR *pSHDR = (DXBCBlobSHDR *)&pBlob[1];
2706 *pcbCode = pSHDR->cToken * 4;
2707 *ppvCode = RTMemAlloc(*pcbCode);
2708 AssertReturn(*ppvCode, VERR_NO_MEMORY);
2709
2710 memcpy(*ppvCode, pSHDR, *pcbCode);
2711 return VINF_SUCCESS;
2712}
2713
2714static int parseShaderDXBC(void const *pvDXBC)
2715{
2716 DXBCHeader *pDXBCHeader = (DXBCHeader *)pvDXBC;
2717 void *pvShaderCode = NULL;
2718 uint32_t cbShaderCode = 0;
2719 int rc = dxbcExtractShaderCode(pDXBCHeader, &pvShaderCode, &cbShaderCode);
2720 if (RT_SUCCESS(rc))
2721 {
2722 rc = parseShaderVM(pvShaderCode, cbShaderCode);
2723 RTMemFree(pvShaderCode);
2724 }
2725 return rc;
2726}
2727#endif /* DXBC_STANDALONE_TEST */
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette