VirtualBox

source: vbox/trunk/src/VBox/Devices/Graphics/DevVGA-SVGA3d-dx-shader.cpp@ 97572

Last change on this file since 97572 was 96450, checked in by vboxsync, 2 years ago

Devices/Graphics: set component types for pixel shader output signature. bugref:10013

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 126.0 KB
Line 
1/* $Id: DevVGA-SVGA3d-dx-shader.cpp 96450 2022-08-24 08:37:38Z vboxsync $ */
2/** @file
3 * DevVMWare - VMWare SVGA device - VGPU10+ (DX) shader utilities.
4 */
5
6/*
7 * Copyright (C) 2020-2022 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_DEV_VMSVGA
33#include <VBox/AssertGuest.h>
34#include <VBox/log.h>
35
36#include <iprt/asm.h>
37#include <iprt/md5.h>
38#include <iprt/mem.h>
39#include <iprt/sort.h>
40#include <iprt/string.h>
41
42#include "DevVGA-SVGA3d-dx-shader.h"
43
44#ifdef RT_OS_WINDOWS
45#include <d3d11TokenizedProgramFormat.hpp>
46#else
47#define D3D11_SB_EXTENDED_OPCODE_RESOURCE_DIM 2
48#define D3D11_SB_EXTENDED_OPCODE_RESOURCE_RETURN_TYPE 3
49#endif
50
51/*
52 *
53 * DXBC shader binary format definitions.
54 *
55 */
56
57/* DXBC container header. */
58typedef struct DXBCHeader
59{
60 uint32_t u32DXBC; /* 0x43425844 = 'D', 'X', 'B', 'C' */
61 uint8_t au8Hash[16]; /* Modified MD5 hash. See dxbcHash. */
62 uint32_t u32Version; /* 1 */
63 uint32_t cbTotal; /* Total size in bytes. Including the header. */
64 uint32_t cBlob; /* Number of entries in aBlobOffset array. */
65 uint32_t aBlobOffset[1]; /* Offsets of blobs from the start of DXBC header. */
66} DXBCHeader;
67
68#define DXBC_MAGIC RT_MAKE_U32_FROM_U8('D', 'X', 'B', 'C')
69
70/* DXBC blob header. */
71typedef struct DXBCBlobHeader
72{
73 uint32_t u32BlobType; /* FourCC code. DXBC_BLOB_TYPE_* */
74 uint32_t cbBlob; /* Size of the blob excluding the blob header. 4 bytes aligned. */
75 /* Followed by the blob's data. */
76} DXBCBlobHeader;
77
78/* DXBC blob types. */
79#define DXBC_BLOB_TYPE_ISGN RT_MAKE_U32_FROM_U8('I', 'S', 'G', 'N')
80#define DXBC_BLOB_TYPE_OSGN RT_MAKE_U32_FROM_U8('O', 'S', 'G', 'N')
81#define DXBC_BLOB_TYPE_PCSG RT_MAKE_U32_FROM_U8('P', 'C', 'S', 'G')
82#define DXBC_BLOB_TYPE_SHDR RT_MAKE_U32_FROM_U8('S', 'H', 'D', 'R')
83/** @todo More... */
84
85/* 'SHDR' blob data format. */
86typedef struct DXBCBlobSHDR
87{
88 VGPU10ProgramToken programToken;
89 uint32_t cToken; /* Number of 32 bit tokens including programToken and cToken. */
90 uint32_t au32Token[1]; /* cToken - 2 number of tokens. */
91} DXBCBlobSHDR;
92
93/* Element of an input or output signature. */
94typedef struct DXBCBlobIOSGNElement
95{
96 uint32_t offElementName; /* Offset of the semantic's name relative to the start of the blob data. */
97 uint32_t idxSemantic; /* Semantic index. */
98 uint32_t enmSystemValue; /* SVGA3dDXSignatureSemanticName */
99 uint32_t enmComponentType; /* 1 - unsigned, 2 - integer, 3 - float. */
100 uint32_t idxRegister; /* Shader register index. Elements must be sorted by register index. */
101 union
102 {
103 struct
104 {
105 uint32_t mask : 8; /* Component mask. Lower 4 bits represent X, Y, Z, W channels. */
106 uint32_t mask2 : 8; /* Which components are used in the shader. */
107 uint32_t pad : 16;
108 } m;
109 uint32_t mask;
110 } u;
111} DXBCBlobIOSGNElement;
112
113/* 'ISGN' and 'OSGN' blob data format. */
114typedef struct DXBCBlobIOSGN
115{
116 uint32_t cElement; /* Number of signature elements. */
117 uint32_t offElement; /* Offset of the first element from the start of the blob. Equals to 8. */
118 DXBCBlobIOSGNElement aElement[1]; /* Signature elements. Size is cElement. */
119 /* Followed by ASCIIZ semantic names. */
120} DXBCBlobIOSGN;
121
122
123/*
124 * VGPU10 shader parser definitions.
125 */
126
127/* Parsed info about an operand index. */
128typedef struct VGPUOperandIndex
129{
130 uint32_t indexRepresentation; /* VGPU10_OPERAND_INDEX_REPRESENTATION */
131 uint64_t iOperandImmediate; /* Needs up to a qword. */
132 struct VGPUOperand *pOperandRelative; /* For VGPU10_OPERAND_INDEX_*RELATIVE */
133} VGPUOperandIndex;
134
135/* Parsed info about an operand. */
136typedef struct VGPUOperand
137{
138 uint32_t numComponents : 2; /* VGPU10_OPERAND_NUM_COMPONENTS */
139 uint32_t selectionMode : 2; /* VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE */
140 uint32_t mask : 4; /* 4-bits X, Y, Z, W mask for VGPU10_OPERAND_4_COMPONENT_MASK_MODE. */
141 uint32_t operandType : 8; /* VGPU10_OPERAND_TYPE */
142 uint32_t indexDimension : 2; /* VGPU10_OPERAND_INDEX_DIMENSION */
143 VGPUOperandIndex aOperandIndex[VGPU10_OPERAND_INDEX_3D]; /* Up to 3. */
144 uint32_t aImm[4]; /* Immediate values for VGPU10_OPERAND_TYPE_IMMEDIATE* */
145 uint32_t cOperandToken; /* Number of tokens in this operand. */
146 uint32_t const *paOperandToken; /* Pointer to operand tokens in the input buffer. */
147} VGPUOperand;
148
149/* Parsed info about an opcode. */
150typedef struct VGPUOpcode
151{
152 uint32_t cOpcodeToken; /* Number of tokens for this operation. */
153 uint32_t opcodeType; /* VGPU10_OPCODE_* */
154 uint32_t opcodeSubtype; /* For example VGPU10_VMWARE_OPCODE_* */
155 uint32_t semanticName; /* SVGA3dDXSignatureSemanticName for system value declarations. */
156 uint32_t cOperand; /* Number of operands for this instruction. */
157 uint32_t aIdxOperand[8]; /* Indices of the instruction operands in the aValOperand array. */
158 /* 8 should be enough for everyone. */
159 VGPUOperand aValOperand[16]; /* Operands including VGPU10_OPERAND_INDEX_*RELATIVE if they are used: */
160 /* Operand1, VGPU10_OPERAND_INDEX_*RELATIVE for Operand1, ... */
161 /* ... */
162 /* OperandN, VGPU10_OPERAND_INDEX_*RELATIVE for OperandN, ... */
163 /* 16 probably should be enough for everyone. */
164 uint32_t const *paOpcodeToken; /* Pointer to opcode tokens in the input buffer. */
165} VGPUOpcode;
166
167typedef struct VGPUOpcodeInfo
168{
169 uint32_t cOperand; /* Number of operands for this opcode. */
170} VGPUOpcodeInfo;
171
172static VGPUOpcodeInfo const g_aOpcodeInfo[] =
173{
174 { 3 }, /* VGPU10_OPCODE_ADD */
175 { 3 }, /* VGPU10_OPCODE_AND */
176 { 0 }, /* VGPU10_OPCODE_BREAK */
177 { 1 }, /* VGPU10_OPCODE_BREAKC */
178 { 1 }, /* VGPU10_OPCODE_CALL */
179 { 2 }, /* VGPU10_OPCODE_CALLC */
180 { 1 }, /* VGPU10_OPCODE_CASE */
181 { 0 }, /* VGPU10_OPCODE_CONTINUE */
182 { 1 }, /* VGPU10_OPCODE_CONTINUEC */
183 { 0 }, /* VGPU10_OPCODE_CUT */
184 { 0 }, /* VGPU10_OPCODE_DEFAULT */
185 { 2 }, /* VGPU10_OPCODE_DERIV_RTX */
186 { 2 }, /* VGPU10_OPCODE_DERIV_RTY */
187 { 1 }, /* VGPU10_OPCODE_DISCARD */
188 { 3 }, /* VGPU10_OPCODE_DIV */
189 { 3 }, /* VGPU10_OPCODE_DP2 */
190 { 3 }, /* VGPU10_OPCODE_DP3 */
191 { 3 }, /* VGPU10_OPCODE_DP4 */
192 { 0 }, /* VGPU10_OPCODE_ELSE */
193 { 0 }, /* VGPU10_OPCODE_EMIT */
194 { 0 }, /* VGPU10_OPCODE_EMITTHENCUT */
195 { 0 }, /* VGPU10_OPCODE_ENDIF */
196 { 0 }, /* VGPU10_OPCODE_ENDLOOP */
197 { 0 }, /* VGPU10_OPCODE_ENDSWITCH */
198 { 3 }, /* VGPU10_OPCODE_EQ */
199 { 2 }, /* VGPU10_OPCODE_EXP */
200 { 2 }, /* VGPU10_OPCODE_FRC */
201 { 2 }, /* VGPU10_OPCODE_FTOI */
202 { 2 }, /* VGPU10_OPCODE_FTOU */
203 { 3 }, /* VGPU10_OPCODE_GE */
204 { 3 }, /* VGPU10_OPCODE_IADD */
205 { 1 }, /* VGPU10_OPCODE_IF */
206 { 3 }, /* VGPU10_OPCODE_IEQ */
207 { 3 }, /* VGPU10_OPCODE_IGE */
208 { 3 }, /* VGPU10_OPCODE_ILT */
209 { 4 }, /* VGPU10_OPCODE_IMAD */
210 { 3 }, /* VGPU10_OPCODE_IMAX */
211 { 3 }, /* VGPU10_OPCODE_IMIN */
212 { 4 }, /* VGPU10_OPCODE_IMUL */
213 { 3 }, /* VGPU10_OPCODE_INE */
214 { 2 }, /* VGPU10_OPCODE_INEG */
215 { 3 }, /* VGPU10_OPCODE_ISHL */
216 { 3 }, /* VGPU10_OPCODE_ISHR */
217 { 2 }, /* VGPU10_OPCODE_ITOF */
218 { 1 }, /* VGPU10_OPCODE_LABEL */
219 { 3 }, /* VGPU10_OPCODE_LD */
220 { 4 }, /* VGPU10_OPCODE_LD_MS */
221 { 2 }, /* VGPU10_OPCODE_LOG */
222 { 0 }, /* VGPU10_OPCODE_LOOP */
223 { 3 }, /* VGPU10_OPCODE_LT */
224 { 4 }, /* VGPU10_OPCODE_MAD */
225 { 3 }, /* VGPU10_OPCODE_MIN */
226 { 3 }, /* VGPU10_OPCODE_MAX */
227 { UINT32_MAX }, /* VGPU10_OPCODE_CUSTOMDATA: special opcode */
228 { 2 }, /* VGPU10_OPCODE_MOV */
229 { 4 }, /* VGPU10_OPCODE_MOVC */
230 { 3 }, /* VGPU10_OPCODE_MUL */
231 { 3 }, /* VGPU10_OPCODE_NE */
232 { 0 }, /* VGPU10_OPCODE_NOP */
233 { 2 }, /* VGPU10_OPCODE_NOT */
234 { 3 }, /* VGPU10_OPCODE_OR */
235 { 3 }, /* VGPU10_OPCODE_RESINFO */
236 { 0 }, /* VGPU10_OPCODE_RET */
237 { 1 }, /* VGPU10_OPCODE_RETC */
238 { 2 }, /* VGPU10_OPCODE_ROUND_NE */
239 { 2 }, /* VGPU10_OPCODE_ROUND_NI */
240 { 2 }, /* VGPU10_OPCODE_ROUND_PI */
241 { 2 }, /* VGPU10_OPCODE_ROUND_Z */
242 { 2 }, /* VGPU10_OPCODE_RSQ */
243 { 4 }, /* VGPU10_OPCODE_SAMPLE */
244 { 5 }, /* VGPU10_OPCODE_SAMPLE_C */
245 { 5 }, /* VGPU10_OPCODE_SAMPLE_C_LZ */
246 { 5 }, /* VGPU10_OPCODE_SAMPLE_L */
247 { 6 }, /* VGPU10_OPCODE_SAMPLE_D */
248 { 5 }, /* VGPU10_OPCODE_SAMPLE_B */
249 { 2 }, /* VGPU10_OPCODE_SQRT */
250 { 1 }, /* VGPU10_OPCODE_SWITCH */
251 { 3 }, /* VGPU10_OPCODE_SINCOS */
252 { 4 }, /* VGPU10_OPCODE_UDIV */
253 { 3 }, /* VGPU10_OPCODE_ULT */
254 { 3 }, /* VGPU10_OPCODE_UGE */
255 { 4 }, /* VGPU10_OPCODE_UMUL */
256 { 4 }, /* VGPU10_OPCODE_UMAD */
257 { 3 }, /* VGPU10_OPCODE_UMAX */
258 { 3 }, /* VGPU10_OPCODE_UMIN */
259 { 3 }, /* VGPU10_OPCODE_USHR */
260 { 2 }, /* VGPU10_OPCODE_UTOF */
261 { 3 }, /* VGPU10_OPCODE_XOR */
262 { 1 }, /* VGPU10_OPCODE_DCL_RESOURCE */
263 { 1 }, /* VGPU10_OPCODE_DCL_CONSTANT_BUFFER */
264 { 1 }, /* VGPU10_OPCODE_DCL_SAMPLER */
265 { 1 }, /* VGPU10_OPCODE_DCL_INDEX_RANGE */
266 { 0 }, /* VGPU10_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY */
267 { 0 }, /* VGPU10_OPCODE_DCL_GS_INPUT_PRIMITIVE */
268 { 0 }, /* VGPU10_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT */
269 { 1 }, /* VGPU10_OPCODE_DCL_INPUT */
270 { 1 }, /* VGPU10_OPCODE_DCL_INPUT_SGV */
271 { 1 }, /* VGPU10_OPCODE_DCL_INPUT_SIV */
272 { 1 }, /* VGPU10_OPCODE_DCL_INPUT_PS */
273 { 1 }, /* VGPU10_OPCODE_DCL_INPUT_PS_SGV */
274 { 1 }, /* VGPU10_OPCODE_DCL_INPUT_PS_SIV */
275 { 1 }, /* VGPU10_OPCODE_DCL_OUTPUT */
276 { 1 }, /* VGPU10_OPCODE_DCL_OUTPUT_SGV */
277 { 1 }, /* VGPU10_OPCODE_DCL_OUTPUT_SIV */
278 { 0 }, /* VGPU10_OPCODE_DCL_TEMPS */
279 { 0 }, /* VGPU10_OPCODE_DCL_INDEXABLE_TEMP */
280 { 0 }, /* VGPU10_OPCODE_DCL_GLOBAL_FLAGS */
281 { UINT32_MAX }, /* VGPU10_OPCODE_VMWARE: special opcode */
282 { 4 }, /* VGPU10_OPCODE_LOD */
283 { 4 }, /* VGPU10_OPCODE_GATHER4 */
284 { 3 }, /* VGPU10_OPCODE_SAMPLE_POS */
285 { 2 }, /* VGPU10_OPCODE_SAMPLE_INFO */
286 { UINT32_MAX }, /* VGPU10_OPCODE_RESERVED1: special opcode */
287 { 0 }, /* VGPU10_OPCODE_HS_DECLS */
288 { 0 }, /* VGPU10_OPCODE_HS_CONTROL_POINT_PHASE */
289 { 0 }, /* VGPU10_OPCODE_HS_FORK_PHASE */
290 { 0 }, /* VGPU10_OPCODE_HS_JOIN_PHASE */
291 { 1 }, /* VGPU10_OPCODE_EMIT_STREAM */
292 { 1 }, /* VGPU10_OPCODE_CUT_STREAM */
293 { 1 }, /* VGPU10_OPCODE_EMITTHENCUT_STREAM */
294 { 1 }, /* VGPU10_OPCODE_INTERFACE_CALL */
295 { 2 }, /* VGPU10_OPCODE_BUFINFO */
296 { 2 }, /* VGPU10_OPCODE_DERIV_RTX_COARSE */
297 { 2 }, /* VGPU10_OPCODE_DERIV_RTX_FINE */
298 { 2 }, /* VGPU10_OPCODE_DERIV_RTY_COARSE */
299 { 2 }, /* VGPU10_OPCODE_DERIV_RTY_FINE */
300 { 5 }, /* VGPU10_OPCODE_GATHER4_C */
301 { 5 }, /* VGPU10_OPCODE_GATHER4_PO */
302 { 6 }, /* VGPU10_OPCODE_GATHER4_PO_C */
303 { 2 }, /* VGPU10_OPCODE_RCP */
304 { 2 }, /* VGPU10_OPCODE_F32TOF16 */
305 { 2 }, /* VGPU10_OPCODE_F16TOF32 */
306 { 4 }, /* VGPU10_OPCODE_UADDC */
307 { 4 }, /* VGPU10_OPCODE_USUBB */
308 { 2 }, /* VGPU10_OPCODE_COUNTBITS */
309 { 2 }, /* VGPU10_OPCODE_FIRSTBIT_HI */
310 { 2 }, /* VGPU10_OPCODE_FIRSTBIT_LO */
311 { 2 }, /* VGPU10_OPCODE_FIRSTBIT_SHI */
312 { 4 }, /* VGPU10_OPCODE_UBFE */
313 { 4 }, /* VGPU10_OPCODE_IBFE */
314 { 5 }, /* VGPU10_OPCODE_BFI */
315 { 2 }, /* VGPU10_OPCODE_BFREV */
316 { 5 }, /* VGPU10_OPCODE_SWAPC */
317 { 1 }, /* VGPU10_OPCODE_DCL_STREAM */
318 { 0 }, /* VGPU10_OPCODE_DCL_FUNCTION_BODY */
319 { 0 }, /* VGPU10_OPCODE_DCL_FUNCTION_TABLE */
320 { 0 }, /* VGPU10_OPCODE_DCL_INTERFACE */
321 { 0 }, /* VGPU10_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT */
322 { 0 }, /* VGPU10_OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT */
323 { 0 }, /* VGPU10_OPCODE_DCL_TESS_DOMAIN */
324 { 0 }, /* VGPU10_OPCODE_DCL_TESS_PARTITIONING */
325 { 0 }, /* VGPU10_OPCODE_DCL_TESS_OUTPUT_PRIMITIVE */
326 { 0 }, /* VGPU10_OPCODE_DCL_HS_MAX_TESSFACTOR */
327 { 0 }, /* VGPU10_OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT */
328 { 0 }, /* VGPU10_OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT */
329 { 0 }, /* VGPU10_OPCODE_DCL_THREAD_GROUP */
330 { 1 }, /* VGPU10_OPCODE_DCL_UAV_TYPED */
331 { 1 }, /* VGPU10_OPCODE_DCL_UAV_RAW */
332 { 1 }, /* VGPU10_OPCODE_DCL_UAV_STRUCTURED */
333 { 1 }, /* VGPU10_OPCODE_DCL_TGSM_RAW */
334 { 1 }, /* VGPU10_OPCODE_DCL_TGSM_STRUCTURED */
335 { 1 }, /* VGPU10_OPCODE_DCL_RESOURCE_RAW */
336 { 1 }, /* VGPU10_OPCODE_DCL_RESOURCE_STRUCTURED */
337 { 3 }, /* VGPU10_OPCODE_LD_UAV_TYPED */
338 { 3 }, /* VGPU10_OPCODE_STORE_UAV_TYPED */
339 { 3 }, /* VGPU10_OPCODE_LD_RAW */
340 { 3 }, /* VGPU10_OPCODE_STORE_RAW */
341 { 4 }, /* VGPU10_OPCODE_LD_STRUCTURED */
342 { 4 }, /* VGPU10_OPCODE_STORE_STRUCTURED */
343 { 3 }, /* VGPU10_OPCODE_ATOMIC_AND */
344 { 3 }, /* VGPU10_OPCODE_ATOMIC_OR */
345 { 3 }, /* VGPU10_OPCODE_ATOMIC_XOR */
346 { 4 }, /* VGPU10_OPCODE_ATOMIC_CMP_STORE */
347 { 3 }, /* VGPU10_OPCODE_ATOMIC_IADD */
348 { 3 }, /* VGPU10_OPCODE_ATOMIC_IMAX */
349 { 3 }, /* VGPU10_OPCODE_ATOMIC_IMIN */
350 { 3 }, /* VGPU10_OPCODE_ATOMIC_UMAX */
351 { 3 }, /* VGPU10_OPCODE_ATOMIC_UMIN */
352 { 2 }, /* VGPU10_OPCODE_IMM_ATOMIC_ALLOC */
353 { 2 }, /* VGPU10_OPCODE_IMM_ATOMIC_CONSUME */
354 { 4 }, /* VGPU10_OPCODE_IMM_ATOMIC_IADD */
355 { 4 }, /* VGPU10_OPCODE_IMM_ATOMIC_AND */
356 { 4 }, /* VGPU10_OPCODE_IMM_ATOMIC_OR */
357 { 4 }, /* VGPU10_OPCODE_IMM_ATOMIC_XOR */
358 { 4 }, /* VGPU10_OPCODE_IMM_ATOMIC_EXCH */
359 { 5 }, /* VGPU10_OPCODE_IMM_ATOMIC_CMP_EXCH */
360 { 4 }, /* VGPU10_OPCODE_IMM_ATOMIC_IMAX */
361 { 4 }, /* VGPU10_OPCODE_IMM_ATOMIC_IMIN */
362 { 4 }, /* VGPU10_OPCODE_IMM_ATOMIC_UMAX */
363 { 4 }, /* VGPU10_OPCODE_IMM_ATOMIC_UMIN */
364 { 0 }, /* VGPU10_OPCODE_SYNC */
365 { 3 }, /* VGPU10_OPCODE_DADD */
366 { 3 }, /* VGPU10_OPCODE_DMAX */
367 { 3 }, /* VGPU10_OPCODE_DMIN */
368 { 3 }, /* VGPU10_OPCODE_DMUL */
369 { 3 }, /* VGPU10_OPCODE_DEQ */
370 { 3 }, /* VGPU10_OPCODE_DGE */
371 { 3 }, /* VGPU10_OPCODE_DLT */
372 { 3 }, /* VGPU10_OPCODE_DNE */
373 { 2 }, /* VGPU10_OPCODE_DMOV */
374 { 4 }, /* VGPU10_OPCODE_DMOVC */
375 { 2 }, /* VGPU10_OPCODE_DTOF */
376 { 2 }, /* VGPU10_OPCODE_FTOD */
377 { 3 }, /* VGPU10_OPCODE_EVAL_SNAPPED */
378 { 3 }, /* VGPU10_OPCODE_EVAL_SAMPLE_INDEX */
379 { 2 }, /* VGPU10_OPCODE_EVAL_CENTROID */
380 { 0 }, /* VGPU10_OPCODE_DCL_GS_INSTANCE_COUNT */
381 { 0 }, /* VGPU10_OPCODE_ABORT */
382 { 0 }, /* VGPU10_OPCODE_DEBUG_BREAK */
383 { 0 }, /* VGPU10_OPCODE_RESERVED0 */
384 { 3 }, /* VGPU10_OPCODE_DDIV */
385 { 4 }, /* VGPU10_OPCODE_DFMA */
386 { 2 }, /* VGPU10_OPCODE_DRCP */
387 { 4 }, /* VGPU10_OPCODE_MSAD */
388 { 2 }, /* VGPU10_OPCODE_DTOI */
389 { 2 }, /* VGPU10_OPCODE_DTOU */
390 { 2 }, /* VGPU10_OPCODE_ITOD */
391 { 2 }, /* VGPU10_OPCODE_UTOD */
392};
393AssertCompile(RT_ELEMENTS(g_aOpcodeInfo) == VGPU10_NUM_OPCODES);
394
395#ifdef LOG_ENABLED
396/*
397 *
398 * Helpers to translate a VGPU10 shader constant to a string.
399 *
400 */
401
402#define SVGA_CASE_ID2STR(idx) case idx: return #idx
403
404static const char *dxbcOpcodeToString(uint32_t opcodeType)
405{
406 VGPU10_OPCODE_TYPE enm = (VGPU10_OPCODE_TYPE)opcodeType;
407 switch (enm)
408 {
409 SVGA_CASE_ID2STR(VGPU10_OPCODE_ADD);
410 SVGA_CASE_ID2STR(VGPU10_OPCODE_AND);
411 SVGA_CASE_ID2STR(VGPU10_OPCODE_BREAK);
412 SVGA_CASE_ID2STR(VGPU10_OPCODE_BREAKC);
413 SVGA_CASE_ID2STR(VGPU10_OPCODE_CALL);
414 SVGA_CASE_ID2STR(VGPU10_OPCODE_CALLC);
415 SVGA_CASE_ID2STR(VGPU10_OPCODE_CASE);
416 SVGA_CASE_ID2STR(VGPU10_OPCODE_CONTINUE);
417 SVGA_CASE_ID2STR(VGPU10_OPCODE_CONTINUEC);
418 SVGA_CASE_ID2STR(VGPU10_OPCODE_CUT);
419 SVGA_CASE_ID2STR(VGPU10_OPCODE_DEFAULT);
420 SVGA_CASE_ID2STR(VGPU10_OPCODE_DERIV_RTX);
421 SVGA_CASE_ID2STR(VGPU10_OPCODE_DERIV_RTY);
422 SVGA_CASE_ID2STR(VGPU10_OPCODE_DISCARD);
423 SVGA_CASE_ID2STR(VGPU10_OPCODE_DIV);
424 SVGA_CASE_ID2STR(VGPU10_OPCODE_DP2);
425 SVGA_CASE_ID2STR(VGPU10_OPCODE_DP3);
426 SVGA_CASE_ID2STR(VGPU10_OPCODE_DP4);
427 SVGA_CASE_ID2STR(VGPU10_OPCODE_ELSE);
428 SVGA_CASE_ID2STR(VGPU10_OPCODE_EMIT);
429 SVGA_CASE_ID2STR(VGPU10_OPCODE_EMITTHENCUT);
430 SVGA_CASE_ID2STR(VGPU10_OPCODE_ENDIF);
431 SVGA_CASE_ID2STR(VGPU10_OPCODE_ENDLOOP);
432 SVGA_CASE_ID2STR(VGPU10_OPCODE_ENDSWITCH);
433 SVGA_CASE_ID2STR(VGPU10_OPCODE_EQ);
434 SVGA_CASE_ID2STR(VGPU10_OPCODE_EXP);
435 SVGA_CASE_ID2STR(VGPU10_OPCODE_FRC);
436 SVGA_CASE_ID2STR(VGPU10_OPCODE_FTOI);
437 SVGA_CASE_ID2STR(VGPU10_OPCODE_FTOU);
438 SVGA_CASE_ID2STR(VGPU10_OPCODE_GE);
439 SVGA_CASE_ID2STR(VGPU10_OPCODE_IADD);
440 SVGA_CASE_ID2STR(VGPU10_OPCODE_IF);
441 SVGA_CASE_ID2STR(VGPU10_OPCODE_IEQ);
442 SVGA_CASE_ID2STR(VGPU10_OPCODE_IGE);
443 SVGA_CASE_ID2STR(VGPU10_OPCODE_ILT);
444 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMAD);
445 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMAX);
446 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMIN);
447 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMUL);
448 SVGA_CASE_ID2STR(VGPU10_OPCODE_INE);
449 SVGA_CASE_ID2STR(VGPU10_OPCODE_INEG);
450 SVGA_CASE_ID2STR(VGPU10_OPCODE_ISHL);
451 SVGA_CASE_ID2STR(VGPU10_OPCODE_ISHR);
452 SVGA_CASE_ID2STR(VGPU10_OPCODE_ITOF);
453 SVGA_CASE_ID2STR(VGPU10_OPCODE_LABEL);
454 SVGA_CASE_ID2STR(VGPU10_OPCODE_LD);
455 SVGA_CASE_ID2STR(VGPU10_OPCODE_LD_MS);
456 SVGA_CASE_ID2STR(VGPU10_OPCODE_LOG);
457 SVGA_CASE_ID2STR(VGPU10_OPCODE_LOOP);
458 SVGA_CASE_ID2STR(VGPU10_OPCODE_LT);
459 SVGA_CASE_ID2STR(VGPU10_OPCODE_MAD);
460 SVGA_CASE_ID2STR(VGPU10_OPCODE_MIN);
461 SVGA_CASE_ID2STR(VGPU10_OPCODE_MAX);
462 SVGA_CASE_ID2STR(VGPU10_OPCODE_CUSTOMDATA);
463 SVGA_CASE_ID2STR(VGPU10_OPCODE_MOV);
464 SVGA_CASE_ID2STR(VGPU10_OPCODE_MOVC);
465 SVGA_CASE_ID2STR(VGPU10_OPCODE_MUL);
466 SVGA_CASE_ID2STR(VGPU10_OPCODE_NE);
467 SVGA_CASE_ID2STR(VGPU10_OPCODE_NOP);
468 SVGA_CASE_ID2STR(VGPU10_OPCODE_NOT);
469 SVGA_CASE_ID2STR(VGPU10_OPCODE_OR);
470 SVGA_CASE_ID2STR(VGPU10_OPCODE_RESINFO);
471 SVGA_CASE_ID2STR(VGPU10_OPCODE_RET);
472 SVGA_CASE_ID2STR(VGPU10_OPCODE_RETC);
473 SVGA_CASE_ID2STR(VGPU10_OPCODE_ROUND_NE);
474 SVGA_CASE_ID2STR(VGPU10_OPCODE_ROUND_NI);
475 SVGA_CASE_ID2STR(VGPU10_OPCODE_ROUND_PI);
476 SVGA_CASE_ID2STR(VGPU10_OPCODE_ROUND_Z);
477 SVGA_CASE_ID2STR(VGPU10_OPCODE_RSQ);
478 SVGA_CASE_ID2STR(VGPU10_OPCODE_SAMPLE);
479 SVGA_CASE_ID2STR(VGPU10_OPCODE_SAMPLE_C);
480 SVGA_CASE_ID2STR(VGPU10_OPCODE_SAMPLE_C_LZ);
481 SVGA_CASE_ID2STR(VGPU10_OPCODE_SAMPLE_L);
482 SVGA_CASE_ID2STR(VGPU10_OPCODE_SAMPLE_D);
483 SVGA_CASE_ID2STR(VGPU10_OPCODE_SAMPLE_B);
484 SVGA_CASE_ID2STR(VGPU10_OPCODE_SQRT);
485 SVGA_CASE_ID2STR(VGPU10_OPCODE_SWITCH);
486 SVGA_CASE_ID2STR(VGPU10_OPCODE_SINCOS);
487 SVGA_CASE_ID2STR(VGPU10_OPCODE_UDIV);
488 SVGA_CASE_ID2STR(VGPU10_OPCODE_ULT);
489 SVGA_CASE_ID2STR(VGPU10_OPCODE_UGE);
490 SVGA_CASE_ID2STR(VGPU10_OPCODE_UMUL);
491 SVGA_CASE_ID2STR(VGPU10_OPCODE_UMAD);
492 SVGA_CASE_ID2STR(VGPU10_OPCODE_UMAX);
493 SVGA_CASE_ID2STR(VGPU10_OPCODE_UMIN);
494 SVGA_CASE_ID2STR(VGPU10_OPCODE_USHR);
495 SVGA_CASE_ID2STR(VGPU10_OPCODE_UTOF);
496 SVGA_CASE_ID2STR(VGPU10_OPCODE_XOR);
497 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_RESOURCE);
498 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_CONSTANT_BUFFER);
499 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_SAMPLER);
500 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INDEX_RANGE);
501 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY);
502 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_GS_INPUT_PRIMITIVE);
503 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT);
504 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INPUT);
505 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INPUT_SGV);
506 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INPUT_SIV);
507 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INPUT_PS);
508 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INPUT_PS_SGV);
509 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INPUT_PS_SIV);
510 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_OUTPUT);
511 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_OUTPUT_SGV);
512 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_OUTPUT_SIV);
513 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_TEMPS);
514 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INDEXABLE_TEMP);
515 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_GLOBAL_FLAGS);
516 SVGA_CASE_ID2STR(VGPU10_OPCODE_VMWARE);
517 SVGA_CASE_ID2STR(VGPU10_OPCODE_LOD);
518 SVGA_CASE_ID2STR(VGPU10_OPCODE_GATHER4);
519 SVGA_CASE_ID2STR(VGPU10_OPCODE_SAMPLE_POS);
520 SVGA_CASE_ID2STR(VGPU10_OPCODE_SAMPLE_INFO);
521 SVGA_CASE_ID2STR(VGPU10_OPCODE_RESERVED1);
522 SVGA_CASE_ID2STR(VGPU10_OPCODE_HS_DECLS);
523 SVGA_CASE_ID2STR(VGPU10_OPCODE_HS_CONTROL_POINT_PHASE);
524 SVGA_CASE_ID2STR(VGPU10_OPCODE_HS_FORK_PHASE);
525 SVGA_CASE_ID2STR(VGPU10_OPCODE_HS_JOIN_PHASE);
526 SVGA_CASE_ID2STR(VGPU10_OPCODE_EMIT_STREAM);
527 SVGA_CASE_ID2STR(VGPU10_OPCODE_CUT_STREAM);
528 SVGA_CASE_ID2STR(VGPU10_OPCODE_EMITTHENCUT_STREAM);
529 SVGA_CASE_ID2STR(VGPU10_OPCODE_INTERFACE_CALL);
530 SVGA_CASE_ID2STR(VGPU10_OPCODE_BUFINFO);
531 SVGA_CASE_ID2STR(VGPU10_OPCODE_DERIV_RTX_COARSE);
532 SVGA_CASE_ID2STR(VGPU10_OPCODE_DERIV_RTX_FINE);
533 SVGA_CASE_ID2STR(VGPU10_OPCODE_DERIV_RTY_COARSE);
534 SVGA_CASE_ID2STR(VGPU10_OPCODE_DERIV_RTY_FINE);
535 SVGA_CASE_ID2STR(VGPU10_OPCODE_GATHER4_C);
536 SVGA_CASE_ID2STR(VGPU10_OPCODE_GATHER4_PO);
537 SVGA_CASE_ID2STR(VGPU10_OPCODE_GATHER4_PO_C);
538 SVGA_CASE_ID2STR(VGPU10_OPCODE_RCP);
539 SVGA_CASE_ID2STR(VGPU10_OPCODE_F32TOF16);
540 SVGA_CASE_ID2STR(VGPU10_OPCODE_F16TOF32);
541 SVGA_CASE_ID2STR(VGPU10_OPCODE_UADDC);
542 SVGA_CASE_ID2STR(VGPU10_OPCODE_USUBB);
543 SVGA_CASE_ID2STR(VGPU10_OPCODE_COUNTBITS);
544 SVGA_CASE_ID2STR(VGPU10_OPCODE_FIRSTBIT_HI);
545 SVGA_CASE_ID2STR(VGPU10_OPCODE_FIRSTBIT_LO);
546 SVGA_CASE_ID2STR(VGPU10_OPCODE_FIRSTBIT_SHI);
547 SVGA_CASE_ID2STR(VGPU10_OPCODE_UBFE);
548 SVGA_CASE_ID2STR(VGPU10_OPCODE_IBFE);
549 SVGA_CASE_ID2STR(VGPU10_OPCODE_BFI);
550 SVGA_CASE_ID2STR(VGPU10_OPCODE_BFREV);
551 SVGA_CASE_ID2STR(VGPU10_OPCODE_SWAPC);
552 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_STREAM);
553 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_FUNCTION_BODY);
554 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_FUNCTION_TABLE);
555 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INTERFACE);
556 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT);
557 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT);
558 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_TESS_DOMAIN);
559 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_TESS_PARTITIONING);
560 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_TESS_OUTPUT_PRIMITIVE);
561 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_HS_MAX_TESSFACTOR);
562 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT);
563 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT);
564 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_THREAD_GROUP);
565 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_UAV_TYPED);
566 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_UAV_RAW);
567 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_UAV_STRUCTURED);
568 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_TGSM_RAW);
569 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_TGSM_STRUCTURED);
570 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_RESOURCE_RAW);
571 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_RESOURCE_STRUCTURED);
572 SVGA_CASE_ID2STR(VGPU10_OPCODE_LD_UAV_TYPED);
573 SVGA_CASE_ID2STR(VGPU10_OPCODE_STORE_UAV_TYPED);
574 SVGA_CASE_ID2STR(VGPU10_OPCODE_LD_RAW);
575 SVGA_CASE_ID2STR(VGPU10_OPCODE_STORE_RAW);
576 SVGA_CASE_ID2STR(VGPU10_OPCODE_LD_STRUCTURED);
577 SVGA_CASE_ID2STR(VGPU10_OPCODE_STORE_STRUCTURED);
578 SVGA_CASE_ID2STR(VGPU10_OPCODE_ATOMIC_AND);
579 SVGA_CASE_ID2STR(VGPU10_OPCODE_ATOMIC_OR);
580 SVGA_CASE_ID2STR(VGPU10_OPCODE_ATOMIC_XOR);
581 SVGA_CASE_ID2STR(VGPU10_OPCODE_ATOMIC_CMP_STORE);
582 SVGA_CASE_ID2STR(VGPU10_OPCODE_ATOMIC_IADD);
583 SVGA_CASE_ID2STR(VGPU10_OPCODE_ATOMIC_IMAX);
584 SVGA_CASE_ID2STR(VGPU10_OPCODE_ATOMIC_IMIN);
585 SVGA_CASE_ID2STR(VGPU10_OPCODE_ATOMIC_UMAX);
586 SVGA_CASE_ID2STR(VGPU10_OPCODE_ATOMIC_UMIN);
587 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_ALLOC);
588 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_CONSUME);
589 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_IADD);
590 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_AND);
591 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_OR);
592 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_XOR);
593 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_EXCH);
594 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_CMP_EXCH);
595 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_IMAX);
596 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_IMIN);
597 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_UMAX);
598 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_UMIN);
599 SVGA_CASE_ID2STR(VGPU10_OPCODE_SYNC);
600 SVGA_CASE_ID2STR(VGPU10_OPCODE_DADD);
601 SVGA_CASE_ID2STR(VGPU10_OPCODE_DMAX);
602 SVGA_CASE_ID2STR(VGPU10_OPCODE_DMIN);
603 SVGA_CASE_ID2STR(VGPU10_OPCODE_DMUL);
604 SVGA_CASE_ID2STR(VGPU10_OPCODE_DEQ);
605 SVGA_CASE_ID2STR(VGPU10_OPCODE_DGE);
606 SVGA_CASE_ID2STR(VGPU10_OPCODE_DLT);
607 SVGA_CASE_ID2STR(VGPU10_OPCODE_DNE);
608 SVGA_CASE_ID2STR(VGPU10_OPCODE_DMOV);
609 SVGA_CASE_ID2STR(VGPU10_OPCODE_DMOVC);
610 SVGA_CASE_ID2STR(VGPU10_OPCODE_DTOF);
611 SVGA_CASE_ID2STR(VGPU10_OPCODE_FTOD);
612 SVGA_CASE_ID2STR(VGPU10_OPCODE_EVAL_SNAPPED);
613 SVGA_CASE_ID2STR(VGPU10_OPCODE_EVAL_SAMPLE_INDEX);
614 SVGA_CASE_ID2STR(VGPU10_OPCODE_EVAL_CENTROID);
615 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_GS_INSTANCE_COUNT);
616 SVGA_CASE_ID2STR(VGPU10_OPCODE_ABORT);
617 SVGA_CASE_ID2STR(VGPU10_OPCODE_DEBUG_BREAK);
618 SVGA_CASE_ID2STR(VGPU10_OPCODE_RESERVED0);
619 SVGA_CASE_ID2STR(VGPU10_OPCODE_DDIV);
620 SVGA_CASE_ID2STR(VGPU10_OPCODE_DFMA);
621 SVGA_CASE_ID2STR(VGPU10_OPCODE_DRCP);
622 SVGA_CASE_ID2STR(VGPU10_OPCODE_MSAD);
623 SVGA_CASE_ID2STR(VGPU10_OPCODE_DTOI);
624 SVGA_CASE_ID2STR(VGPU10_OPCODE_DTOU);
625 SVGA_CASE_ID2STR(VGPU10_OPCODE_ITOD);
626 SVGA_CASE_ID2STR(VGPU10_OPCODE_UTOD);
627 SVGA_CASE_ID2STR(VGPU10_NUM_OPCODES);
628 }
629 return NULL;
630}
631
632
633static const char *dxbcShaderTypeToString(uint32_t value)
634{
635 VGPU10_PROGRAM_TYPE enm = (VGPU10_PROGRAM_TYPE)value;
636 switch (enm)
637 {
638 SVGA_CASE_ID2STR(VGPU10_PIXEL_SHADER);
639 SVGA_CASE_ID2STR(VGPU10_VERTEX_SHADER);
640 SVGA_CASE_ID2STR(VGPU10_GEOMETRY_SHADER);
641 SVGA_CASE_ID2STR(VGPU10_HULL_SHADER);
642 SVGA_CASE_ID2STR(VGPU10_DOMAIN_SHADER);
643 SVGA_CASE_ID2STR(VGPU10_COMPUTE_SHADER);
644 }
645 return NULL;
646}
647
648
649static const char *dxbcCustomDataClassToString(uint32_t value)
650{
651 VGPU10_CUSTOMDATA_CLASS enm = (VGPU10_CUSTOMDATA_CLASS)value;
652 switch (enm)
653 {
654 SVGA_CASE_ID2STR(VGPU10_CUSTOMDATA_COMMENT);
655 SVGA_CASE_ID2STR(VGPU10_CUSTOMDATA_DEBUGINFO);
656 SVGA_CASE_ID2STR(VGPU10_CUSTOMDATA_OPAQUE);
657 SVGA_CASE_ID2STR(VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER);
658 }
659 return NULL;
660}
661
662
663static const char *dxbcSystemNameToString(uint32_t value)
664{
665 VGPU10_SYSTEM_NAME enm = (VGPU10_SYSTEM_NAME)value;
666 switch (enm)
667 {
668 SVGA_CASE_ID2STR(VGPU10_NAME_UNDEFINED);
669 SVGA_CASE_ID2STR(VGPU10_NAME_POSITION);
670 SVGA_CASE_ID2STR(VGPU10_NAME_CLIP_DISTANCE);
671 SVGA_CASE_ID2STR(VGPU10_NAME_CULL_DISTANCE);
672 SVGA_CASE_ID2STR(VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX);
673 SVGA_CASE_ID2STR(VGPU10_NAME_VIEWPORT_ARRAY_INDEX);
674 SVGA_CASE_ID2STR(VGPU10_NAME_VERTEX_ID);
675 SVGA_CASE_ID2STR(VGPU10_NAME_PRIMITIVE_ID);
676 SVGA_CASE_ID2STR(VGPU10_NAME_INSTANCE_ID);
677 SVGA_CASE_ID2STR(VGPU10_NAME_IS_FRONT_FACE);
678 SVGA_CASE_ID2STR(VGPU10_NAME_SAMPLE_INDEX);
679 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR);
680 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR);
681 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR);
682 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR);
683 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR);
684 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR);
685 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR);
686 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR);
687 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR);
688 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_TRI_INSIDE_TESSFACTOR);
689 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_LINE_DETAIL_TESSFACTOR);
690 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_LINE_DENSITY_TESSFACTOR);
691 }
692 return NULL;
693}
694
695
696static const char *dxbcOperandTypeToString(uint32_t value)
697{
698 VGPU10_OPERAND_TYPE enm = (VGPU10_OPERAND_TYPE)value;
699 switch (enm)
700 {
701 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_TEMP);
702 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT);
703 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_OUTPUT);
704 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INDEXABLE_TEMP);
705 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_IMMEDIATE32);
706 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_IMMEDIATE64);
707 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_SAMPLER);
708 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_RESOURCE);
709 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_CONSTANT_BUFFER);
710 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER);
711 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_LABEL);
712 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID);
713 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_OUTPUT_DEPTH);
714 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_NULL);
715 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_RASTERIZER);
716 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK);
717 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_STREAM);
718 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_FUNCTION_BODY);
719 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_FUNCTION_TABLE);
720 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INTERFACE);
721 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_FUNCTION_INPUT);
722 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_FUNCTION_OUTPUT);
723 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID);
724 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_FORK_INSTANCE_ID);
725 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_JOIN_INSTANCE_ID);
726 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT);
727 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT);
728 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT);
729 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT);
730 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_THIS_POINTER);
731 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_UAV);
732 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY);
733 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_THREAD_ID);
734 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_THREAD_GROUP_ID);
735 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP);
736 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK);
737 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED);
738 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID);
739 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL);
740 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL);
741 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_CYCLE_COUNTER);
742 SVGA_CASE_ID2STR(VGPU10_NUM_OPERANDS);
743 }
744 return NULL;
745}
746
747
748static const char *dxbcOperandNumComponentsToString(uint32_t value)
749{
750 VGPU10_OPERAND_NUM_COMPONENTS enm = (VGPU10_OPERAND_NUM_COMPONENTS)value;
751 switch (enm)
752 {
753 SVGA_CASE_ID2STR(VGPU10_OPERAND_0_COMPONENT);
754 SVGA_CASE_ID2STR(VGPU10_OPERAND_1_COMPONENT);
755 SVGA_CASE_ID2STR(VGPU10_OPERAND_4_COMPONENT);
756 SVGA_CASE_ID2STR(VGPU10_OPERAND_N_COMPONENT);
757 }
758 return NULL;
759}
760
761
762static const char *dxbcOperandComponentModeToString(uint32_t value)
763{
764 VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE enm = (VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE)value;
765 switch (enm)
766 {
767 SVGA_CASE_ID2STR(VGPU10_OPERAND_4_COMPONENT_MASK_MODE);
768 SVGA_CASE_ID2STR(VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE);
769 SVGA_CASE_ID2STR(VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE);
770 }
771 return NULL;
772}
773
774
775static const char *dxbcOperandComponentNameToString(uint32_t value)
776{
777 VGPU10_COMPONENT_NAME enm = (VGPU10_COMPONENT_NAME)value;
778 switch (enm)
779 {
780 SVGA_CASE_ID2STR(VGPU10_COMPONENT_X);
781 SVGA_CASE_ID2STR(VGPU10_COMPONENT_Y);
782 SVGA_CASE_ID2STR(VGPU10_COMPONENT_Z);
783 SVGA_CASE_ID2STR(VGPU10_COMPONENT_W);
784 }
785 return NULL;
786}
787
788
789static const char *dxbcOperandIndexDimensionToString(uint32_t value)
790{
791 VGPU10_OPERAND_INDEX_DIMENSION enm = (VGPU10_OPERAND_INDEX_DIMENSION)value;
792 switch (enm)
793 {
794 SVGA_CASE_ID2STR(VGPU10_OPERAND_INDEX_0D);
795 SVGA_CASE_ID2STR(VGPU10_OPERAND_INDEX_1D);
796 SVGA_CASE_ID2STR(VGPU10_OPERAND_INDEX_2D);
797 SVGA_CASE_ID2STR(VGPU10_OPERAND_INDEX_3D);
798 }
799 return NULL;
800}
801
802
803static const char *dxbcOperandIndexRepresentationToString(uint32_t value)
804{
805 VGPU10_OPERAND_INDEX_REPRESENTATION enm = (VGPU10_OPERAND_INDEX_REPRESENTATION)value;
806 switch (enm)
807 {
808 SVGA_CASE_ID2STR(VGPU10_OPERAND_INDEX_IMMEDIATE32);
809 SVGA_CASE_ID2STR(VGPU10_OPERAND_INDEX_IMMEDIATE64);
810 SVGA_CASE_ID2STR(VGPU10_OPERAND_INDEX_RELATIVE);
811 SVGA_CASE_ID2STR(VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE);
812 SVGA_CASE_ID2STR(VGPU10_OPERAND_INDEX_IMMEDIATE64_PLUS_RELATIVE);
813 }
814 return NULL;
815}
816
817
818static const char *dxbcInterpolationModeToString(uint32_t value)
819{
820 VGPU10_INTERPOLATION_MODE enm = (VGPU10_INTERPOLATION_MODE)value;
821 switch (enm)
822 {
823 SVGA_CASE_ID2STR(VGPU10_INTERPOLATION_UNDEFINED);
824 SVGA_CASE_ID2STR(VGPU10_INTERPOLATION_CONSTANT);
825 SVGA_CASE_ID2STR(VGPU10_INTERPOLATION_LINEAR);
826 SVGA_CASE_ID2STR(VGPU10_INTERPOLATION_LINEAR_CENTROID);
827 SVGA_CASE_ID2STR(VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE);
828 SVGA_CASE_ID2STR(VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID);
829 SVGA_CASE_ID2STR(VGPU10_INTERPOLATION_LINEAR_SAMPLE);
830 SVGA_CASE_ID2STR(VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE);
831 }
832 return NULL;
833}
834
835
836static const char *dxbcResourceDimensionToString(uint32_t value)
837{
838 VGPU10_RESOURCE_DIMENSION enm = (VGPU10_RESOURCE_DIMENSION)value;
839 switch (enm)
840 {
841 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_UNKNOWN);
842 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_BUFFER);
843 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_TEXTURE1D);
844 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_TEXTURE2D);
845 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS);
846 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_TEXTURE3D);
847 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_TEXTURECUBE);
848 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_TEXTURE1DARRAY);
849 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY);
850 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY);
851 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY);
852 }
853 return NULL;
854}
855
856
857static const char *dxbcVmwareOpcodeTypeToString(uint32_t value)
858{
859 VGPU10_VMWARE_OPCODE_TYPE enm = (VGPU10_VMWARE_OPCODE_TYPE)value;
860 switch (enm)
861 {
862 SVGA_CASE_ID2STR(VGPU10_VMWARE_OPCODE_IDIV);
863 SVGA_CASE_ID2STR(VGPU10_VMWARE_OPCODE_DFRC);
864 SVGA_CASE_ID2STR(VGPU10_VMWARE_OPCODE_DRSQ);
865 SVGA_CASE_ID2STR(VGPU10_VMWARE_NUM_OPCODES);
866 }
867 return NULL;
868}
869
870#endif /* LOG_ENABLED */
871
872/*
873 * MD5 from IPRT (alt-md5.cpp) for DXBC hash calculation.
874 * DXBC hash function uses a different padding for the data, see dxbcHash.
875 * Therefore RTMd5Final is not needed. Two functions have been renamed: dxbcRTMd5Update dxbcRTMd5Init.
876 */
877
878
879/* The four core functions - F1 is optimized somewhat */
880/* #define F1(x, y, z) (x & y | ~x & z) */
881#define F1(x, y, z) (z ^ (x & (y ^ z)))
882#define F2(x, y, z) F1(z, x, y)
883#define F3(x, y, z) (x ^ y ^ z)
884#define F4(x, y, z) (y ^ (x | ~z))
885
886
887/* This is the central step in the MD5 algorithm. */
888#define MD5STEP(f, w, x, y, z, data, s) \
889 ( w += f(x, y, z) + data, w = w<<s | w>>(32-s), w += x )
890
891
892/**
893 * The core of the MD5 algorithm, this alters an existing MD5 hash to reflect
894 * the addition of 16 longwords of new data. RTMd5Update blocks the data and
895 * converts bytes into longwords for this routine.
896 */
897static void rtMd5Transform(uint32_t buf[4], uint32_t const in[16])
898{
899 uint32_t a, b, c, d;
900
901 a = buf[0];
902 b = buf[1];
903 c = buf[2];
904 d = buf[3];
905
906 /* fn, w, x, y, z, data, s) */
907 MD5STEP(F1, a, b, c, d, in[ 0] + 0xd76aa478, 7);
908 MD5STEP(F1, d, a, b, c, in[ 1] + 0xe8c7b756, 12);
909 MD5STEP(F1, c, d, a, b, in[ 2] + 0x242070db, 17);
910 MD5STEP(F1, b, c, d, a, in[ 3] + 0xc1bdceee, 22);
911 MD5STEP(F1, a, b, c, d, in[ 4] + 0xf57c0faf, 7);
912 MD5STEP(F1, d, a, b, c, in[ 5] + 0x4787c62a, 12);
913 MD5STEP(F1, c, d, a, b, in[ 6] + 0xa8304613, 17);
914 MD5STEP(F1, b, c, d, a, in[ 7] + 0xfd469501, 22);
915 MD5STEP(F1, a, b, c, d, in[ 8] + 0x698098d8, 7);
916 MD5STEP(F1, d, a, b, c, in[ 9] + 0x8b44f7af, 12);
917 MD5STEP(F1, c, d, a, b, in[10] + 0xffff5bb1, 17);
918 MD5STEP(F1, b, c, d, a, in[11] + 0x895cd7be, 22);
919 MD5STEP(F1, a, b, c, d, in[12] + 0x6b901122, 7);
920 MD5STEP(F1, d, a, b, c, in[13] + 0xfd987193, 12);
921 MD5STEP(F1, c, d, a, b, in[14] + 0xa679438e, 17);
922 MD5STEP(F1, b, c, d, a, in[15] + 0x49b40821, 22);
923
924 MD5STEP(F2, a, b, c, d, in[ 1] + 0xf61e2562, 5);
925 MD5STEP(F2, d, a, b, c, in[ 6] + 0xc040b340, 9);
926 MD5STEP(F2, c, d, a, b, in[11] + 0x265e5a51, 14);
927 MD5STEP(F2, b, c, d, a, in[ 0] + 0xe9b6c7aa, 20);
928 MD5STEP(F2, a, b, c, d, in[ 5] + 0xd62f105d, 5);
929 MD5STEP(F2, d, a, b, c, in[10] + 0x02441453, 9);
930 MD5STEP(F2, c, d, a, b, in[15] + 0xd8a1e681, 14);
931 MD5STEP(F2, b, c, d, a, in[ 4] + 0xe7d3fbc8, 20);
932 MD5STEP(F2, a, b, c, d, in[ 9] + 0x21e1cde6, 5);
933 MD5STEP(F2, d, a, b, c, in[14] + 0xc33707d6, 9);
934 MD5STEP(F2, c, d, a, b, in[ 3] + 0xf4d50d87, 14);
935 MD5STEP(F2, b, c, d, a, in[ 8] + 0x455a14ed, 20);
936 MD5STEP(F2, a, b, c, d, in[13] + 0xa9e3e905, 5);
937 MD5STEP(F2, d, a, b, c, in[ 2] + 0xfcefa3f8, 9);
938 MD5STEP(F2, c, d, a, b, in[ 7] + 0x676f02d9, 14);
939 MD5STEP(F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20);
940
941 MD5STEP(F3, a, b, c, d, in[ 5] + 0xfffa3942, 4);
942 MD5STEP(F3, d, a, b, c, in[ 8] + 0x8771f681, 11);
943 MD5STEP(F3, c, d, a, b, in[11] + 0x6d9d6122, 16);
944 MD5STEP(F3, b, c, d, a, in[14] + 0xfde5380c, 23);
945 MD5STEP(F3, a, b, c, d, in[ 1] + 0xa4beea44, 4);
946 MD5STEP(F3, d, a, b, c, in[ 4] + 0x4bdecfa9, 11);
947 MD5STEP(F3, c, d, a, b, in[ 7] + 0xf6bb4b60, 16);
948 MD5STEP(F3, b, c, d, a, in[10] + 0xbebfbc70, 23);
949 MD5STEP(F3, a, b, c, d, in[13] + 0x289b7ec6, 4);
950 MD5STEP(F3, d, a, b, c, in[ 0] + 0xeaa127fa, 11);
951 MD5STEP(F3, c, d, a, b, in[ 3] + 0xd4ef3085, 16);
952 MD5STEP(F3, b, c, d, a, in[ 6] + 0x04881d05, 23);
953 MD5STEP(F3, a, b, c, d, in[ 9] + 0xd9d4d039, 4);
954 MD5STEP(F3, d, a, b, c, in[12] + 0xe6db99e5, 11);
955 MD5STEP(F3, c, d, a, b, in[15] + 0x1fa27cf8, 16);
956 MD5STEP(F3, b, c, d, a, in[ 2] + 0xc4ac5665, 23);
957
958 MD5STEP(F4, a, b, c, d, in[ 0] + 0xf4292244, 6);
959 MD5STEP(F4, d, a, b, c, in[ 7] + 0x432aff97, 10);
960 MD5STEP(F4, c, d, a, b, in[14] + 0xab9423a7, 15);
961 MD5STEP(F4, b, c, d, a, in[ 5] + 0xfc93a039, 21);
962 MD5STEP(F4, a, b, c, d, in[12] + 0x655b59c3, 6);
963 MD5STEP(F4, d, a, b, c, in[ 3] + 0x8f0ccc92, 10);
964 MD5STEP(F4, c, d, a, b, in[10] + 0xffeff47d, 15);
965 MD5STEP(F4, b, c, d, a, in[ 1] + 0x85845dd1, 21);
966 MD5STEP(F4, a, b, c, d, in[ 8] + 0x6fa87e4f, 6);
967 MD5STEP(F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10);
968 MD5STEP(F4, c, d, a, b, in[ 6] + 0xa3014314, 15);
969 MD5STEP(F4, b, c, d, a, in[13] + 0x4e0811a1, 21);
970 MD5STEP(F4, a, b, c, d, in[ 4] + 0xf7537e82, 6);
971 MD5STEP(F4, d, a, b, c, in[11] + 0xbd3af235, 10);
972 MD5STEP(F4, c, d, a, b, in[ 2] + 0x2ad7d2bb, 15);
973 MD5STEP(F4, b, c, d, a, in[ 9] + 0xeb86d391, 21);
974
975 buf[0] += a;
976 buf[1] += b;
977 buf[2] += c;
978 buf[3] += d;
979}
980
981
982#ifdef RT_BIG_ENDIAN
983/*
984 * Note: this code is harmless on little-endian machines.
985 */
986static void rtMd5ByteReverse(uint32_t *buf, unsigned int longs)
987{
988 uint32_t t;
989 do
990 {
991 t = *buf;
992 t = RT_LE2H_U32(t);
993 *buf = t;
994 buf++;
995 } while (--longs);
996}
997#else /* little endian - do nothing */
998# define rtMd5ByteReverse(buf, len) do { /* Nothing */ } while (0)
999#endif
1000
1001
1002/*
1003 * Start MD5 accumulation. Set bit count to 0 and buffer to mysterious
1004 * initialization constants.
1005 */
1006static void dxbcRTMd5Init(PRTMD5CONTEXT pCtx)
1007{
1008 pCtx->AltPrivate.buf[0] = 0x67452301;
1009 pCtx->AltPrivate.buf[1] = 0xefcdab89;
1010 pCtx->AltPrivate.buf[2] = 0x98badcfe;
1011 pCtx->AltPrivate.buf[3] = 0x10325476;
1012
1013 pCtx->AltPrivate.bits[0] = 0;
1014 pCtx->AltPrivate.bits[1] = 0;
1015}
1016
1017
1018/*
1019 * Update context to reflect the concatenation of another buffer full
1020 * of bytes.
1021 */
1022/** @todo Optimize this, because len is always a multiple of 64. */
1023static void dxbcRTMd5Update(PRTMD5CONTEXT pCtx, const void *pvBuf, size_t len)
1024{
1025 const uint8_t *buf = (const uint8_t *)pvBuf;
1026 uint32_t t;
1027
1028 /* Update bitcount */
1029 t = pCtx->AltPrivate.bits[0];
1030 if ((pCtx->AltPrivate.bits[0] = t + ((uint32_t) len << 3)) < t)
1031 pCtx->AltPrivate.bits[1]++; /* Carry from low to high */
1032 pCtx->AltPrivate.bits[1] += (uint32_t)(len >> 29);
1033
1034 t = (t >> 3) & 0x3f; /* Bytes already in shsInfo->data */
1035
1036 /* Handle any leading odd-sized chunks */
1037 if (t)
1038 {
1039 uint8_t *p = (uint8_t *) pCtx->AltPrivate.in + t;
1040
1041 t = 64 - t;
1042 if (len < t)
1043 {
1044 memcpy(p, buf, len);
1045 return;
1046 }
1047 memcpy(p, buf, t);
1048 rtMd5ByteReverse(pCtx->AltPrivate.in, 16);
1049 rtMd5Transform(pCtx->AltPrivate.buf, pCtx->AltPrivate.in);
1050 buf += t;
1051 len -= t;
1052 }
1053
1054 /* Process data in 64-byte chunks */
1055#ifndef RT_BIG_ENDIAN
1056 if (!((uintptr_t)buf & 0x3))
1057 {
1058 while (len >= 64) {
1059 rtMd5Transform(pCtx->AltPrivate.buf, (uint32_t const *)buf);
1060 buf += 64;
1061 len -= 64;
1062 }
1063 }
1064 else
1065#endif
1066 {
1067 while (len >= 64) {
1068 memcpy(pCtx->AltPrivate.in, buf, 64);
1069 rtMd5ByteReverse(pCtx->AltPrivate.in, 16);
1070 rtMd5Transform(pCtx->AltPrivate.buf, pCtx->AltPrivate.in);
1071 buf += 64;
1072 len -= 64;
1073 }
1074 }
1075
1076 /* Handle any remaining bytes of data */
1077 memcpy(pCtx->AltPrivate.in, buf, len);
1078}
1079
1080
1081static void dxbcHash(void const *pvData, uint32_t cbData, uint8_t pabDigest[RTMD5HASHSIZE])
1082{
1083 size_t const kBlockSize = 64;
1084 uint8_t au8BlockBuffer[kBlockSize];
1085
1086 static uint8_t const s_au8Padding[kBlockSize] =
1087 {
1088 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1089 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1090 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1091 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1092 };
1093
1094 RTMD5CONTEXT Ctx;
1095 PRTMD5CONTEXT const pCtx = &Ctx;
1096 dxbcRTMd5Init(pCtx);
1097
1098 uint8_t const *pu8Data = (uint8_t *)pvData;
1099 size_t cbRemaining = cbData;
1100
1101 size_t const cbCompleteBlocks = cbData & ~ (kBlockSize - 1);
1102 dxbcRTMd5Update(pCtx, pu8Data, cbCompleteBlocks);
1103 pu8Data += cbCompleteBlocks;
1104 cbRemaining -= cbCompleteBlocks;
1105
1106 /* Custom padding. */
1107 if (cbRemaining >= kBlockSize - 2 * sizeof(uint32_t))
1108 {
1109 /* Two additional blocks. */
1110 memcpy(&au8BlockBuffer[0], pu8Data, cbRemaining);
1111 memcpy(&au8BlockBuffer[cbRemaining], s_au8Padding, kBlockSize - cbRemaining);
1112 dxbcRTMd5Update(pCtx, au8BlockBuffer, kBlockSize);
1113
1114 memset(&au8BlockBuffer[sizeof(uint32_t)], 0, kBlockSize - 2 * sizeof(uint32_t));
1115 }
1116 else
1117 {
1118 /* One additional block. */
1119 memcpy(&au8BlockBuffer[sizeof(uint32_t)], pu8Data, cbRemaining);
1120 memcpy(&au8BlockBuffer[sizeof(uint32_t) + cbRemaining], s_au8Padding, kBlockSize - cbRemaining - 2 * sizeof(uint32_t));
1121 }
1122
1123 /* Set the first and last dwords of the last block. */
1124 *(uint32_t *)&au8BlockBuffer[0] = cbData << 3;
1125 *(uint32_t *)&au8BlockBuffer[kBlockSize - sizeof(uint32_t)] = (cbData << 1) | 1;
1126 dxbcRTMd5Update(pCtx, au8BlockBuffer, kBlockSize);
1127
1128 AssertCompile(sizeof(pCtx->AltPrivate.buf) == RTMD5HASHSIZE);
1129 memcpy(pabDigest, pCtx->AltPrivate.buf, RTMD5HASHSIZE);
1130}
1131
1132
1133/*
1134 *
1135 * Shader token reader.
1136 *
1137 */
1138
1139typedef struct DXBCTokenReader
1140{
1141 uint32_t const *pToken; /* Next token to read. */
1142 uint32_t cToken; /* How many tokens total. */
1143 uint32_t cRemainingToken; /* How many tokens remain. */
1144} DXBCTokenReader;
1145
1146
1147#ifdef LOG_ENABLED
1148DECLINLINE(uint32_t) dxbcTokenReaderByteOffset(DXBCTokenReader *r)
1149{
1150 return (r->cToken - r->cRemainingToken) * 4;
1151}
1152#endif
1153
1154
1155#if 0 // Unused for now
1156DECLINLINE(uint32_t) dxbcTokenReaderRemaining(DXBCTokenReader *r)
1157{
1158 return r->cRemainingToken;
1159}
1160#endif
1161
1162
1163DECLINLINE(uint32_t const *) dxbcTokenReaderPtr(DXBCTokenReader *r)
1164{
1165 return r->pToken;
1166}
1167
1168
1169DECLINLINE(bool) dxbcTokenReaderCanRead(DXBCTokenReader *r, uint32_t cToken)
1170{
1171 return cToken <= r->cRemainingToken;
1172}
1173
1174
1175DECLINLINE(void) dxbcTokenReaderSkip(DXBCTokenReader *r, uint32_t cToken)
1176{
1177 AssertReturnVoid(r->cRemainingToken >= cToken);
1178 r->cRemainingToken -= cToken;
1179 r->pToken += cToken;
1180}
1181
1182
1183DECLINLINE(uint32_t) dxbcTokenReaderRead32(DXBCTokenReader *r)
1184{
1185 AssertReturn(r->cRemainingToken, 0);
1186 --r->cRemainingToken;
1187 return *(r->pToken++);
1188}
1189
1190
1191DECLINLINE(uint64_t) dxbcTokenReaderRead64(DXBCTokenReader *r)
1192{
1193 uint64_t const u64Low = dxbcTokenReaderRead32(r);
1194 uint64_t const u64High = dxbcTokenReaderRead32(r);
1195 return u64Low + (u64High << 32);
1196}
1197
1198
1199/*
1200 *
1201 * Byte writer.
1202 *
1203 */
1204
1205typedef struct DXBCByteWriter
1206{
1207 uint8_t *pu8ByteCodeBegin; /* First byte of the buffer. */
1208 uint8_t *pu8ByteCodePtr; /* Next byte to be written. */
1209 uint32_t cbAllocated; /* How many bytes allocated in the buffer. */
1210 uint32_t cbRemaining; /* How many bytes remain in the buffer. */
1211 uint32_t cbWritten; /* Offset of first never written byte.
1212 * Since the writer allows to jump in the buffer, this field tracks
1213 * the upper boundary of the written data.
1214 */
1215 int32_t rc;
1216} DXBCByteWriter;
1217
1218
1219typedef struct DXBCByteWriterState
1220{
1221 uint32_t off; /* Offset of the next free byte. */
1222} DXBCByteWriterState;
1223
1224
1225DECLINLINE(void *) dxbcByteWriterPtr(DXBCByteWriter *w)
1226{
1227 return w->pu8ByteCodePtr;
1228}
1229
1230
1231DECLINLINE(uint32_t) dxbcByteWriterSize(DXBCByteWriter *w)
1232{
1233 return (uint32_t)(w->pu8ByteCodePtr - w->pu8ByteCodeBegin);
1234}
1235
1236
1237static bool dxbcByteWriterRealloc(DXBCByteWriter *w, uint32_t cbNew)
1238{
1239 void *pvNew = RTMemAllocZ(cbNew);
1240 if (!pvNew)
1241 {
1242 w->rc = VERR_NO_MEMORY;
1243 return false;
1244 }
1245
1246 uint32_t const cbCurrent = dxbcByteWriterSize(w);
1247 memcpy(pvNew, w->pu8ByteCodeBegin, cbCurrent);
1248 RTMemFree(w->pu8ByteCodeBegin);
1249
1250 w->pu8ByteCodeBegin = (uint8_t *)pvNew;
1251 w->pu8ByteCodePtr = w->pu8ByteCodeBegin + cbCurrent;
1252 w->cbAllocated = cbNew;
1253 w->cbRemaining = cbNew - cbCurrent;
1254 return true;
1255}
1256
1257
1258DECLINLINE(bool) dxbcByteWriterSetOffset(DXBCByteWriter *w, uint32_t off, DXBCByteWriterState *pSavedWriterState)
1259{
1260 if (RT_FAILURE(w->rc))
1261 return false;
1262
1263 uint32_t const cbNew = RT_ALIGN_32(off, 1024);
1264 uint32_t const cbMax = 2 * SVGA3D_MAX_SHADER_MEMORY_BYTES;
1265 AssertReturnStmt(off < cbMax && cbNew < cbMax, w->rc = VERR_INVALID_PARAMETER, false);
1266
1267 if (cbNew > w->cbAllocated)
1268 {
1269 if (!dxbcByteWriterRealloc(w, cbNew))
1270 return false;
1271 }
1272
1273 pSavedWriterState->off = dxbcByteWriterSize(w);
1274
1275 w->pu8ByteCodePtr = w->pu8ByteCodeBegin + off;
1276 w->cbRemaining = w->cbAllocated - off;
1277 return true;
1278}
1279
1280
1281DECLINLINE(void) dxbcByteWriterRestore(DXBCByteWriter *w, DXBCByteWriterState *pSavedWriterState)
1282{
1283 w->pu8ByteCodePtr = w->pu8ByteCodeBegin + pSavedWriterState->off;
1284 w->cbRemaining = w->cbAllocated - pSavedWriterState->off;
1285}
1286
1287
1288DECLINLINE(void) dxbcByteWriterCommit(DXBCByteWriter *w, uint32_t cbCommit)
1289{
1290 if (RT_FAILURE(w->rc))
1291 return;
1292
1293 Assert(cbCommit < w->cbRemaining);
1294 cbCommit = RT_MIN(cbCommit, w->cbRemaining);
1295 w->pu8ByteCodePtr += cbCommit;
1296 w->cbRemaining -= cbCommit;
1297 w->cbWritten = RT_MAX(w->cbWritten, w->cbAllocated - w->cbRemaining);
1298}
1299
1300
1301DECLINLINE(bool) dxbcByteWriterCanWrite(DXBCByteWriter *w, uint32_t cbMore)
1302{
1303 if (RT_FAILURE(w->rc))
1304 return false;
1305
1306 if (cbMore <= w->cbRemaining)
1307 return true;
1308
1309 /* Do not allow to allocate more than 2 * SVGA3D_MAX_SHADER_MEMORY_BYTES */
1310 uint32_t const cbMax = 2 * SVGA3D_MAX_SHADER_MEMORY_BYTES;
1311 AssertReturnStmt(cbMore < cbMax && RT_ALIGN_32(cbMore, 4096) <= cbMax - w->cbAllocated, w->rc = VERR_INVALID_PARAMETER, false);
1312
1313 uint32_t cbNew = w->cbAllocated + RT_ALIGN_32(cbMore, 4096);
1314 return dxbcByteWriterRealloc(w, cbNew);
1315}
1316
1317
1318DECLINLINE(bool) dxbcByteWriterAddTokens(DXBCByteWriter *w, uint32_t const *paToken, uint32_t cToken)
1319{
1320 uint32_t const cbWrite = cToken * sizeof(uint32_t);
1321 if (dxbcByteWriterCanWrite(w, cbWrite))
1322 {
1323 memcpy(dxbcByteWriterPtr(w), paToken, cbWrite);
1324 dxbcByteWriterCommit(w, cbWrite);
1325 return true;
1326 }
1327
1328 AssertFailed();
1329 return false;
1330}
1331
1332
1333DECLINLINE(bool) dxbcByteWriterInit(DXBCByteWriter *w, uint32_t cbInitial)
1334{
1335 RT_ZERO(*w);
1336 return dxbcByteWriterCanWrite(w, cbInitial);
1337}
1338
1339
1340DECLINLINE(void) dxbcByteWriterReset(DXBCByteWriter *w)
1341{
1342 RTMemFree(w->pu8ByteCodeBegin);
1343 RT_ZERO(*w);
1344}
1345
1346
1347DECLINLINE(void) dxbcByteWriterFetchData(DXBCByteWriter *w, void **ppv, uint32_t *pcb)
1348{
1349 *ppv = w->pu8ByteCodeBegin;
1350 *pcb = w->cbWritten;
1351
1352 w->pu8ByteCodeBegin = NULL;
1353 dxbcByteWriterReset(w);
1354}
1355
1356
1357/*
1358 *
1359 * VGPU10 shader parser.
1360 *
1361 */
1362
1363/* Parse an instruction operand. */
1364static int dxbcParseOperand(DXBCTokenReader *r, VGPUOperand *paOperand, uint32_t *pcOperandRemain)
1365{
1366 ASSERT_GUEST_RETURN(*pcOperandRemain > 0, VERR_NOT_SUPPORTED);
1367
1368 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1369
1370 paOperand->paOperandToken = dxbcTokenReaderPtr(r);
1371 paOperand->cOperandToken = 0;
1372
1373 VGPU10OperandToken0 operand0;
1374 operand0.value = dxbcTokenReaderRead32(r);
1375
1376 Log6((" %s(%d) %s(%d) %s(%d) %s(%d)\n",
1377 dxbcOperandNumComponentsToString(operand0.numComponents), operand0.numComponents,
1378 dxbcOperandComponentModeToString(operand0.selectionMode), operand0.selectionMode,
1379 dxbcOperandTypeToString(operand0.operandType), operand0.operandType,
1380 dxbcOperandIndexDimensionToString(operand0.indexDimension), operand0.indexDimension));
1381
1382 ASSERT_GUEST_RETURN(operand0.numComponents <= VGPU10_OPERAND_4_COMPONENT, VERR_INVALID_PARAMETER);
1383 if ( operand0.operandType != VGPU10_OPERAND_TYPE_IMMEDIATE32
1384 && operand0.operandType != VGPU10_OPERAND_TYPE_IMMEDIATE64)
1385 {
1386 if (operand0.numComponents == VGPU10_OPERAND_4_COMPONENT)
1387 {
1388 ASSERT_GUEST_RETURN(operand0.selectionMode <= VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE, VERR_INVALID_PARAMETER);
1389 switch (operand0.selectionMode)
1390 {
1391 case VGPU10_OPERAND_4_COMPONENT_MASK_MODE:
1392 Log6((" Mask %#x\n", operand0.mask));
1393 break;
1394 case VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE:
1395 Log6((" Swizzle %s(%d) %s(%d) %s(%d) %s(%d)\n",
1396 dxbcOperandComponentNameToString(operand0.swizzleX), operand0.swizzleX,
1397 dxbcOperandComponentNameToString(operand0.swizzleY), operand0.swizzleY,
1398 dxbcOperandComponentNameToString(operand0.swizzleZ), operand0.swizzleZ,
1399 dxbcOperandComponentNameToString(operand0.swizzleW), operand0.swizzleW));
1400 break;
1401 case VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE:
1402 Log6((" Select %s(%d)\n",
1403 dxbcOperandComponentNameToString(operand0.selectMask), operand0.selectMask));
1404 break;
1405 default: /* Never happens. */
1406 break;
1407 }
1408 }
1409 }
1410
1411 if (operand0.extended)
1412 {
1413 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1414
1415 VGPU10OperandToken1 operand1;
1416 operand1.value = dxbcTokenReaderRead32(r);
1417 }
1418
1419 ASSERT_GUEST_RETURN(operand0.operandType < VGPU10_NUM_OPERANDS, VERR_INVALID_PARAMETER);
1420
1421 if ( operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32
1422 || operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE64)
1423 {
1424 uint32_t cComponent = 0;
1425 if (operand0.numComponents == VGPU10_OPERAND_4_COMPONENT)
1426 cComponent = 4;
1427 else if (operand0.numComponents == VGPU10_OPERAND_1_COMPONENT)
1428 cComponent = 1;
1429
1430 for (uint32_t i = 0; i < cComponent; ++i)
1431 {
1432 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1433 paOperand->aImm[i] = dxbcTokenReaderRead32(r);
1434 }
1435 }
1436
1437 paOperand->numComponents = operand0.numComponents;
1438 paOperand->selectionMode = operand0.selectionMode;
1439 paOperand->mask = operand0.mask;
1440 paOperand->operandType = operand0.operandType;
1441 paOperand->indexDimension = operand0.indexDimension;
1442
1443 int rc = VINF_SUCCESS;
1444 /* 'indexDimension' tells the number of indices. 'i' is the array index, i.e. i = 0 for 1D, etc. */
1445 for (uint32_t i = 0; i < operand0.indexDimension; ++i)
1446 {
1447 if (i == 0) /* VGPU10_OPERAND_INDEX_1D */
1448 paOperand->aOperandIndex[i].indexRepresentation = operand0.index0Representation;
1449 else if (i == 1) /* VGPU10_OPERAND_INDEX_2D */
1450 paOperand->aOperandIndex[i].indexRepresentation = operand0.index1Representation;
1451 else /* VGPU10_OPERAND_INDEX_3D */
1452 continue; /* Skip because it is "rarely if ever used" and is not supported by VGPU10. */
1453
1454 uint32_t const indexRepresentation = paOperand->aOperandIndex[i].indexRepresentation;
1455 switch (indexRepresentation)
1456 {
1457 case VGPU10_OPERAND_INDEX_IMMEDIATE32:
1458 {
1459 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1460 paOperand->aOperandIndex[i].iOperandImmediate = dxbcTokenReaderRead32(r);
1461 break;
1462 }
1463 case VGPU10_OPERAND_INDEX_IMMEDIATE64:
1464 {
1465 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 2), VERR_INVALID_PARAMETER);
1466 paOperand->aOperandIndex[i].iOperandImmediate = dxbcTokenReaderRead64(r);
1467 break;
1468 }
1469 case VGPU10_OPERAND_INDEX_RELATIVE:
1470 {
1471 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1472 paOperand->aOperandIndex[i].pOperandRelative = &paOperand[1];
1473 Log6((" [operand index %d] parsing relative\n", i));
1474 rc = dxbcParseOperand(r, &paOperand[1], pcOperandRemain);
1475 break;
1476 }
1477 case VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE:
1478 {
1479 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 2), VERR_INVALID_PARAMETER);
1480 paOperand->aOperandIndex[i].iOperandImmediate = dxbcTokenReaderRead32(r);
1481 paOperand->aOperandIndex[i].pOperandRelative = &paOperand[1];
1482 Log6((" [operand index %d] parsing relative\n", i));
1483 rc = dxbcParseOperand(r, &paOperand[1], pcOperandRemain);
1484 break;
1485 }
1486 case VGPU10_OPERAND_INDEX_IMMEDIATE64_PLUS_RELATIVE:
1487 {
1488 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 3), VERR_INVALID_PARAMETER);
1489 paOperand->aOperandIndex[i].iOperandImmediate = dxbcTokenReaderRead64(r);
1490 paOperand->aOperandIndex[i].pOperandRelative = &paOperand[1];
1491 Log6((" [operand index %d] parsing relative\n", i));
1492 rc = dxbcParseOperand(r, &paOperand[1], pcOperandRemain);
1493 break;
1494 }
1495 default:
1496 ASSERT_GUEST_FAILED_RETURN(VERR_INVALID_PARAMETER);
1497 }
1498 Log6((" [operand index %d] %s(%d): %#llx%s\n",
1499 i, dxbcOperandIndexRepresentationToString(indexRepresentation), indexRepresentation,
1500 paOperand->aOperandIndex[i].iOperandImmediate, paOperand->aOperandIndex[i].pOperandRelative ? " + relative" : ""));
1501 if (RT_FAILURE(rc))
1502 break;
1503 }
1504
1505 paOperand->cOperandToken = dxbcTokenReaderPtr(r) - paOperand->paOperandToken;
1506
1507 *pcOperandRemain -= 1;
1508 return VINF_SUCCESS;
1509}
1510
1511
1512/* Parse an instruction. */
1513static int dxbcParseOpcode(DXBCTokenReader *r, VGPUOpcode *pOpcode)
1514{
1515 RT_ZERO(*pOpcode);
1516 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1517
1518 pOpcode->paOpcodeToken = dxbcTokenReaderPtr(r);
1519
1520 VGPU10OpcodeToken0 opcode;
1521 opcode.value = dxbcTokenReaderRead32(r);
1522
1523 pOpcode->opcodeType = opcode.opcodeType;
1524 ASSERT_GUEST_RETURN(pOpcode->opcodeType < VGPU10_NUM_OPCODES, VERR_INVALID_PARAMETER);
1525
1526 Log6(("[%#x] %s length %d\n",
1527 dxbcTokenReaderByteOffset(r) - 4, dxbcOpcodeToString(pOpcode->opcodeType), opcode.instructionLength));
1528
1529 uint32_t const cOperand = g_aOpcodeInfo[pOpcode->opcodeType].cOperand;
1530 if (cOperand != UINT32_MAX)
1531 {
1532 ASSERT_GUEST_RETURN(cOperand < RT_ELEMENTS(pOpcode->aIdxOperand), VERR_INVALID_PARAMETER);
1533
1534 pOpcode->cOpcodeToken = opcode.instructionLength;
1535 uint32_t cOpcode = 1; /* Opcode token + extended opcode tokens. */
1536 if (opcode.extended)
1537 {
1538 if ( pOpcode->opcodeType == VGPU10_OPCODE_DCL_FUNCTION_BODY
1539 || pOpcode->opcodeType == VGPU10_OPCODE_DCL_FUNCTION_TABLE
1540 || pOpcode->opcodeType == VGPU10_OPCODE_DCL_INTERFACE
1541 || pOpcode->opcodeType == VGPU10_OPCODE_INTERFACE_CALL
1542 || pOpcode->opcodeType == VGPU10_OPCODE_DCL_THREAD_GROUP)
1543 {
1544 /* "next DWORD contains ... the actual instruction length in DWORD since it may not fit into 7 bits" */
1545 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1546 pOpcode->cOpcodeToken = dxbcTokenReaderRead32(r);
1547 ++cOpcode;
1548 }
1549 else
1550 {
1551 VGPU10OpcodeToken1 opcode1;
1552 do
1553 {
1554 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1555 opcode1.value = dxbcTokenReaderRead32(r);
1556 ++cOpcode;
1557 ASSERT_GUEST( opcode1.opcodeType == VGPU10_EXTENDED_OPCODE_SAMPLE_CONTROLS
1558 || opcode1.opcodeType == D3D11_SB_EXTENDED_OPCODE_RESOURCE_DIM
1559 || opcode1.opcodeType == D3D11_SB_EXTENDED_OPCODE_RESOURCE_RETURN_TYPE);
1560 } while(opcode1.extended);
1561 }
1562 }
1563
1564 ASSERT_GUEST_RETURN(pOpcode->cOpcodeToken >= 1 && pOpcode->cOpcodeToken < 256, VERR_INVALID_PARAMETER);
1565 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, pOpcode->cOpcodeToken - cOpcode), VERR_INVALID_PARAMETER);
1566
1567#ifdef LOG_ENABLED
1568 Log6((" %08X", opcode.value));
1569 for (uint32_t i = 1; i < pOpcode->cOpcodeToken; ++i)
1570 Log6((" %08X", r->pToken[i - 1]));
1571 Log6(("\n"));
1572
1573 if (pOpcode->opcodeType == VGPU10_OPCODE_DCL_RESOURCE)
1574 Log6((" %s\n",
1575 dxbcResourceDimensionToString(opcode.resourceDimension)));
1576 else
1577 Log6((" %s\n",
1578 dxbcInterpolationModeToString(opcode.interpolationMode)));
1579#endif
1580 /* Additional tokens before operands. */
1581 switch (pOpcode->opcodeType)
1582 {
1583 case VGPU10_OPCODE_INTERFACE_CALL:
1584 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1585 dxbcTokenReaderSkip(r, 1); /* Function index */
1586 break;
1587
1588 default:
1589 break;
1590 }
1591
1592 /* Operands. */
1593 uint32_t cOperandRemain = RT_ELEMENTS(pOpcode->aValOperand);
1594 for (uint32_t i = 0; i < cOperand; ++i)
1595 {
1596 Log6((" [operand %d]\n", i));
1597 uint32_t const idxOperand = RT_ELEMENTS(pOpcode->aValOperand) - cOperandRemain;
1598 pOpcode->aIdxOperand[i] = idxOperand;
1599 int rc = dxbcParseOperand(r, &pOpcode->aValOperand[idxOperand], &cOperandRemain);
1600 ASSERT_GUEST_RETURN(RT_SUCCESS(rc), VERR_INVALID_PARAMETER);
1601 }
1602
1603 pOpcode->cOperand = cOperand;
1604
1605 /* Additional tokens after operands. */
1606 switch (pOpcode->opcodeType)
1607 {
1608 case VGPU10_OPCODE_DCL_INPUT_SIV:
1609 case VGPU10_OPCODE_DCL_INPUT_SGV:
1610 case VGPU10_OPCODE_DCL_INPUT_PS_SIV:
1611 case VGPU10_OPCODE_DCL_INPUT_PS_SGV:
1612 case VGPU10_OPCODE_DCL_OUTPUT_SIV:
1613 case VGPU10_OPCODE_DCL_OUTPUT_SGV:
1614 {
1615 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1616
1617 VGPU10NameToken name;
1618 name.value = dxbcTokenReaderRead32(r);
1619 Log6((" %s(%d)\n",
1620 dxbcSystemNameToString(name.name), name.name));
1621 pOpcode->semanticName = name.name;
1622 break;
1623 }
1624 case VGPU10_OPCODE_DCL_RESOURCE:
1625 {
1626 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1627 dxbcTokenReaderSkip(r, 1); /* ResourceReturnTypeToken */
1628 break;
1629 }
1630 case VGPU10_OPCODE_DCL_TEMPS:
1631 {
1632 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1633 dxbcTokenReaderSkip(r, 1); /* number of temps */
1634 break;
1635 }
1636 case VGPU10_OPCODE_DCL_INDEXABLE_TEMP:
1637 {
1638 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 3), VERR_INVALID_PARAMETER);
1639 dxbcTokenReaderSkip(r, 3); /* register index; number of registers; number of components */
1640 break;
1641 }
1642 case VGPU10_OPCODE_DCL_INDEX_RANGE:
1643 {
1644 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1645 dxbcTokenReaderSkip(r, 1); /* count of registers */
1646 break;
1647 }
1648 case VGPU10_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT:
1649 {
1650 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1651 dxbcTokenReaderSkip(r, 1); /* maximum number of primitives */
1652 break;
1653 }
1654 case VGPU10_OPCODE_DCL_GS_INSTANCE_COUNT:
1655 {
1656 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1657 dxbcTokenReaderSkip(r, 1); /* number of instances */
1658 break;
1659 }
1660 case VGPU10_OPCODE_DCL_HS_MAX_TESSFACTOR:
1661 {
1662 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1663 dxbcTokenReaderSkip(r, 1); /* maximum TessFactor */
1664 break;
1665 }
1666 case VGPU10_OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT:
1667 case VGPU10_OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT:
1668 {
1669 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1670 dxbcTokenReaderSkip(r, 1); /* number of instances of the current fork/join phase program to execute */
1671 break;
1672 }
1673 case VGPU10_OPCODE_DCL_THREAD_GROUP:
1674 {
1675 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 3), VERR_INVALID_PARAMETER);
1676 dxbcTokenReaderSkip(r, 3); /* Thread Group dimensions as UINT32: x, y, z */
1677 break;
1678 }
1679 case VGPU10_OPCODE_DCL_UAV_TYPED:
1680 {
1681 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1682 dxbcTokenReaderSkip(r, 1); /* ResourceReturnTypeToken */
1683 break;
1684 }
1685 case VGPU10_OPCODE_DCL_UAV_STRUCTURED:
1686 {
1687 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1688 dxbcTokenReaderSkip(r, 1); /* byte stride */
1689 break;
1690 }
1691 case VGPU10_OPCODE_DCL_TGSM_RAW:
1692 {
1693 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1694 dxbcTokenReaderSkip(r, 1); /* element count */
1695 break;
1696 }
1697 case VGPU10_OPCODE_DCL_TGSM_STRUCTURED:
1698 {
1699 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 2), VERR_INVALID_PARAMETER);
1700 dxbcTokenReaderSkip(r, 2); /* struct byte stride; struct count */
1701 break;
1702 }
1703 case VGPU10_OPCODE_DCL_RESOURCE_STRUCTURED:
1704 {
1705 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1706 dxbcTokenReaderSkip(r, 1); /* struct byte stride */
1707 break;
1708 }
1709 default:
1710 break;
1711 }
1712 }
1713 else
1714 {
1715 /* Special opcodes. */
1716 if (pOpcode->opcodeType == VGPU10_OPCODE_CUSTOMDATA)
1717 {
1718 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1719 pOpcode->cOpcodeToken = dxbcTokenReaderRead32(r);
1720
1721 if (pOpcode->cOpcodeToken < 2)
1722 pOpcode->cOpcodeToken = 2;
1723 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, pOpcode->cOpcodeToken - 2), VERR_INVALID_PARAMETER);
1724
1725#ifdef LOG_ENABLED
1726 Log6((" %08X", opcode.value));
1727 for (uint32_t i = 1; i < pOpcode->cOpcodeToken; ++i)
1728 Log6((" %08X", r->pToken[i - 1]));
1729 Log6(("\n"));
1730
1731 Log6((" %s\n",
1732 dxbcCustomDataClassToString(opcode.customDataClass)));
1733#endif
1734 dxbcTokenReaderSkip(r, pOpcode->cOpcodeToken - 2);
1735 }
1736 else if (pOpcode->opcodeType == VGPU10_OPCODE_VMWARE)
1737 {
1738 pOpcode->cOpcodeToken = opcode.instructionLength;
1739 pOpcode->opcodeSubtype = opcode.vmwareOpcodeType;
1740
1741#ifdef LOG_ENABLED
1742 Log6((" %08X", opcode.value));
1743 for (uint32_t i = 1; i < pOpcode->cOpcodeToken; ++i)
1744 Log6((" %08X", r->pToken[i - 1]));
1745 Log6(("\n"));
1746
1747 Log6((" %s(%d)\n",
1748 dxbcVmwareOpcodeTypeToString(opcode.vmwareOpcodeType), opcode.vmwareOpcodeType));
1749#endif
1750
1751 if (opcode.vmwareOpcodeType == VGPU10_VMWARE_OPCODE_IDIV)
1752 {
1753 /* Integer divide. */
1754 pOpcode->cOperand = 4; /* dstQuit, dstRem, src0, src1. */
1755
1756 /* Operands. */
1757 uint32_t cOperandRemain = RT_ELEMENTS(pOpcode->aValOperand);
1758 for (uint32_t i = 0; i < pOpcode->cOperand; ++i)
1759 {
1760 Log6((" [operand %d]\n", i));
1761 uint32_t const idxOperand = RT_ELEMENTS(pOpcode->aValOperand) - cOperandRemain;
1762 pOpcode->aIdxOperand[i] = idxOperand;
1763 int rc = dxbcParseOperand(r, &pOpcode->aValOperand[idxOperand], &cOperandRemain);
1764 ASSERT_GUEST_RETURN(RT_SUCCESS(rc), VERR_INVALID_PARAMETER);
1765 }
1766 }
1767 //else if (opcode.vmwareOpcodeType == VGPU10_VMWARE_OPCODE_DFRC)
1768 //else if (opcode.vmwareOpcodeType == VGPU10_VMWARE_OPCODE_DRSQ)
1769 else
1770 {
1771 /** @todo implement */
1772 ASSERT_GUEST_FAILED_RETURN(VERR_INVALID_PARAMETER);
1773 }
1774 }
1775 else
1776 ASSERT_GUEST_FAILED_RETURN(VERR_INVALID_PARAMETER);
1777
1778 // pOpcode->cOperand = 0;
1779 }
1780
1781 return VINF_SUCCESS;
1782}
1783
1784
1785typedef struct DXBCOUTPUTCTX
1786{
1787 VGPU10ProgramToken programToken;
1788 uint32_t cToken; /* Number of tokens in the original shader code. */
1789
1790 uint32_t offSubroutine; /* Current offset where to write subroutines. */
1791} DXBCOUTPUTCTX;
1792
1793
1794static void dxbcOutputInit(DXBCOUTPUTCTX *pOutctx, VGPU10ProgramToken const *pProgramToken, uint32_t cToken)
1795{
1796 RT_ZERO(*pOutctx);
1797 pOutctx->programToken = *pProgramToken;
1798 pOutctx->cToken = cToken;
1799
1800 pOutctx->offSubroutine = cToken * 4;
1801}
1802
1803
1804static int dxbcEmitVmwareIDIV(DXBCOUTPUTCTX *pOutctx, DXBCByteWriter *w, VGPUOpcode *pOpcode)
1805{
1806 /* Insert a call and append a subroutne. */
1807 VGPU10OpcodeToken0 opcode;
1808 VGPU10OperandToken0 operand;
1809
1810 uint32_t const label = (pOutctx->offSubroutine - dxbcByteWriterSize(w)) / 4;
1811
1812 /*
1813 * Call
1814 */
1815 opcode.value = 0;
1816 opcode.opcodeType = VGPU10_OPCODE_CALL;
1817 opcode.instructionLength = 3;
1818 dxbcByteWriterAddTokens(w, &opcode.value, 1);
1819
1820 operand.value = 0;
1821 operand.numComponents = VGPU10_OPERAND_1_COMPONENT;
1822 operand.operandType = VGPU10_OPERAND_TYPE_LABEL;
1823 operand.indexDimension = VGPU10_OPERAND_INDEX_1D;
1824 operand.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
1825 dxbcByteWriterAddTokens(w, &operand.value, 1);
1826
1827 dxbcByteWriterAddTokens(w, &label, 1);
1828
1829 opcode.value = 0;
1830 opcode.opcodeType = VGPU10_OPCODE_NOP;
1831 opcode.instructionLength = 1;
1832 for (unsigned i = 0; i < pOpcode->cOpcodeToken - 3; ++i)
1833 dxbcByteWriterAddTokens(w, &opcode.value, 1);
1834
1835 /*
1836 * Subroutine.
1837 */
1838 DXBCByteWriterState savedWriterState;
1839 if (!dxbcByteWriterSetOffset(w, pOutctx->offSubroutine, &savedWriterState))
1840 return w->rc;
1841
1842 /* label */
1843 opcode.value = 0;
1844 opcode.opcodeType = VGPU10_OPCODE_LABEL;
1845 opcode.instructionLength = 3;
1846 dxbcByteWriterAddTokens(w, &opcode.value, 1);
1847
1848 operand.value = 0;
1849 operand.numComponents = VGPU10_OPERAND_1_COMPONENT;
1850 operand.operandType = VGPU10_OPERAND_TYPE_LABEL;
1851 operand.indexDimension = VGPU10_OPERAND_INDEX_1D;
1852 operand.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
1853 dxbcByteWriterAddTokens(w, &operand.value, 1);
1854 dxbcByteWriterAddTokens(w, &label, 1);
1855
1856 /* Just output UDIV for now. */
1857 opcode.value = 0;
1858 opcode.opcodeType = VGPU10_OPCODE_UDIV;
1859 opcode.instructionLength = pOpcode->cOpcodeToken;
1860 dxbcByteWriterAddTokens(w, &opcode.value, 1);
1861 dxbcByteWriterAddTokens(w, &pOpcode->paOpcodeToken[1], pOpcode->cOpcodeToken - 1);
1862
1863 /* ret */
1864 opcode.value = 0;
1865 opcode.opcodeType = VGPU10_OPCODE_RET;
1866 opcode.instructionLength = 1;
1867 dxbcByteWriterAddTokens(w, &opcode.value, 1);
1868
1869 pOutctx->offSubroutine = dxbcByteWriterSize(w);
1870 dxbcByteWriterRestore(w, &savedWriterState);
1871
1872 return w->rc;
1873}
1874
1875
1876static int dxbcOutputOpcode(DXBCOUTPUTCTX *pOutctx, DXBCByteWriter *w, VGPUOpcode *pOpcode)
1877{
1878#ifdef DEBUG
1879 void *pvBegin = dxbcByteWriterPtr(w);
1880#endif
1881
1882 if ( pOutctx->programToken.programType == VGPU10_PIXEL_SHADER
1883 && pOpcode->opcodeType == VGPU10_OPCODE_DCL_RESOURCE)
1884 {
1885 /** @todo This is a workaround. */
1886 /* Sometimes the guest (Mesa) created a shader with uninitialized resource dimension.
1887 * Use texture 2d because it is what a pixel shader normally uses.
1888 */
1889 ASSERT_GUEST_RETURN(pOpcode->cOpcodeToken == 4, VERR_INVALID_PARAMETER);
1890
1891 VGPU10OpcodeToken0 opcode;
1892 opcode.value = pOpcode->paOpcodeToken[0];
1893 if (opcode.resourceDimension == VGPU10_RESOURCE_DIMENSION_BUFFER)
1894 {
1895 opcode.resourceDimension = VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
1896 dxbcByteWriterAddTokens(w, &opcode.value, 1);
1897 dxbcByteWriterAddTokens(w, &pOpcode->paOpcodeToken[1], 2);
1898 uint32_t const returnType = 0x5555; /* float */
1899 dxbcByteWriterAddTokens(w, &returnType, 1);
1900 return VINF_SUCCESS;
1901 }
1902 }
1903 else if (pOpcode->opcodeType == VGPU10_OPCODE_VMWARE)
1904 {
1905 if (pOpcode->opcodeSubtype == VGPU10_VMWARE_OPCODE_IDIV)
1906 {
1907 return dxbcEmitVmwareIDIV(pOutctx, w, pOpcode);
1908 }
1909
1910 ASSERT_GUEST_FAILED_RETURN(VERR_NOT_SUPPORTED);
1911 }
1912
1913#ifdef DEBUG
1914 /* The code above must emit either nothing or everything. */
1915 Assert((uintptr_t)pvBegin == (uintptr_t)dxbcByteWriterPtr(w));
1916#endif
1917
1918 /* Just emit the unmodified instruction. */
1919 dxbcByteWriterAddTokens(w, pOpcode->paOpcodeToken, pOpcode->cOpcodeToken);
1920 return VINF_SUCCESS;
1921}
1922
1923
1924static int dxbcOutputFinalize(DXBCOUTPUTCTX *pOutctx, DXBCByteWriter *w)
1925{
1926 RT_NOREF(pOutctx, w);
1927 return VINF_SUCCESS;
1928}
1929
1930
1931static DECLCALLBACK(int) signatureEntryCmp(void const *pvElement1, void const *pvElement2, void *pvUser)
1932{
1933 SVGA3dDXSignatureEntry const *e1 = (SVGA3dDXSignatureEntry *)pvElement1;
1934 SVGA3dDXSignatureEntry const *e2 = (SVGA3dDXSignatureEntry *)pvElement2;
1935 RT_NOREF(pvUser);
1936
1937 if (e1->registerIndex < e2->registerIndex)
1938 return -1;
1939 if (e1->registerIndex > e2->registerIndex)
1940 return 1;
1941 if ((e1->mask & 0xf) < (e2->mask & 0xf))
1942 return -1;
1943 if ((e1->mask & 0xf) > (e2->mask & 0xf))
1944 return 1;
1945 return 0;
1946}
1947
1948
1949static void dxbcGenerateSemantics(DXShaderInfo *pInfo, uint32_t cSignature,
1950 SVGA3dDXSignatureEntry const *paSignature,
1951 DXShaderAttributeSemantic *paSemantic,
1952 uint32_t u32BlobType);
1953
1954
1955/*
1956 * Parse and verify the shader byte code. Extract input and output signatures into pInfo.
1957 */
1958int DXShaderParse(void const *pvShaderCode, uint32_t cbShaderCode, DXShaderInfo *pInfo)
1959{
1960 if (pInfo)
1961 RT_ZERO(*pInfo);
1962
1963 ASSERT_GUEST_RETURN(cbShaderCode <= SVGA3D_MAX_SHADER_MEMORY_BYTES, VERR_INVALID_PARAMETER);
1964 ASSERT_GUEST_RETURN((cbShaderCode & 0x3) == 0, VERR_INVALID_PARAMETER); /* Aligned to the token size. */
1965 ASSERT_GUEST_RETURN(cbShaderCode >= 8, VERR_INVALID_PARAMETER); /* At least program and length tokens. */
1966
1967 uint32_t const *paToken = (uint32_t *)pvShaderCode;
1968
1969 VGPU10ProgramToken const *pProgramToken = (VGPU10ProgramToken *)&paToken[0];
1970 ASSERT_GUEST_RETURN( pProgramToken->majorVersion >= 4
1971 && pProgramToken->programType <= VGPU10_COMPUTE_SHADER, VERR_INVALID_PARAMETER);
1972 if (pInfo)
1973 pInfo->enmProgramType = (VGPU10_PROGRAM_TYPE)pProgramToken->programType;
1974
1975 uint32_t const cToken = paToken[1];
1976 Log6(("Shader version %d.%d type %s(%d) Length %d\n",
1977 pProgramToken->majorVersion, pProgramToken->minorVersion, dxbcShaderTypeToString(pProgramToken->programType), pProgramToken->programType, cToken));
1978 ASSERT_GUEST_RETURN(cbShaderCode / 4 >= cToken, VERR_INVALID_PARAMETER); /* Declared length should be less or equal to the actual. */
1979
1980 /* Write the parsed (and possibly modified) shader to a memory buffer. */
1981 DXBCByteWriter dxbcByteWriter;
1982 DXBCByteWriter *w = &dxbcByteWriter;
1983 if (!dxbcByteWriterInit(w, 4096 + cbShaderCode))
1984 return VERR_NO_MEMORY;
1985
1986 dxbcByteWriterAddTokens(w, paToken, 2);
1987
1988 DXBCTokenReader parser;
1989 RT_ZERO(parser);
1990
1991 DXBCTokenReader *r = &parser;
1992 r->pToken = &paToken[2];
1993 r->cToken = r->cRemainingToken = cToken - 2;
1994
1995 DXBCOUTPUTCTX outctx;
1996 dxbcOutputInit(&outctx, pProgramToken, cToken);
1997
1998 int rc = VINF_SUCCESS;
1999 while (dxbcTokenReaderCanRead(r, 1))
2000 {
2001 uint32_t const offOpcode = dxbcByteWriterSize(w);
2002
2003 VGPUOpcode opcode;
2004 rc = dxbcParseOpcode(r, &opcode);
2005 ASSERT_GUEST_STMT_BREAK(RT_SUCCESS(rc), rc = VERR_INVALID_PARAMETER);
2006
2007 rc = dxbcOutputOpcode(&outctx, w, &opcode);
2008 AssertRCBreak(rc);
2009
2010 if (pInfo)
2011 {
2012 /* Remember offsets of DCL_RESOURCE instructions. */
2013 if ( outctx.programToken.programType == VGPU10_PIXEL_SHADER
2014 && opcode.opcodeType == VGPU10_OPCODE_DCL_RESOURCE)
2015 {
2016 if ( opcode.cOperand == 1
2017 && opcode.aValOperand[0].indexDimension == VGPU10_OPERAND_INDEX_1D
2018 && opcode.aValOperand[0].aOperandIndex[0].indexRepresentation == VGPU10_OPERAND_INDEX_IMMEDIATE32)
2019 {
2020 uint32_t const indexResource = opcode.aValOperand[0].aOperandIndex[0].iOperandImmediate;
2021 if (indexResource < SVGA3D_DX_MAX_SRVIEWS)
2022 {
2023 ASSERT_GUEST(pInfo->aOffDclResource[indexResource] == 0);
2024 pInfo->aOffDclResource[indexResource] = offOpcode;
2025 pInfo->cDclResource = RT_MAX(pInfo->cDclResource, indexResource + 1);
2026 }
2027 else
2028 ASSERT_GUEST_FAILED();
2029 }
2030 else
2031 ASSERT_GUEST_FAILED();
2032 }
2033
2034 /* Fetch signatures. */
2035 SVGA3dDXSignatureEntry *pSignatureEntry = NULL;
2036 switch (opcode.opcodeType)
2037 {
2038 case VGPU10_OPCODE_DCL_INPUT:
2039 case VGPU10_OPCODE_DCL_INPUT_SIV:
2040 //case VGPU10_OPCODE_DCL_INPUT_SGV:
2041 case VGPU10_OPCODE_DCL_INPUT_PS:
2042 //case VGPU10_OPCODE_DCL_INPUT_PS_SIV:
2043 //case VGPU10_OPCODE_DCL_INPUT_PS_SGV:
2044 //case VGPU10_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT:
2045 ASSERT_GUEST_STMT_BREAK(pInfo->cInputSignature < RT_ELEMENTS(pInfo->aInputSignature), rc = VERR_INVALID_PARAMETER);
2046 pSignatureEntry = &pInfo->aInputSignature[pInfo->cInputSignature++];
2047 break;
2048 case VGPU10_OPCODE_DCL_OUTPUT:
2049 case VGPU10_OPCODE_DCL_OUTPUT_SIV:
2050 case VGPU10_OPCODE_DCL_OUTPUT_SGV:
2051 //case VGPU10_OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT:
2052 ASSERT_GUEST_STMT_BREAK(pInfo->cOutputSignature < RT_ELEMENTS(pInfo->aOutputSignature), rc = VERR_INVALID_PARAMETER);
2053 pSignatureEntry = &pInfo->aOutputSignature[pInfo->cOutputSignature++];
2054 break;
2055 default:
2056 break;
2057 }
2058
2059 if (RT_FAILURE(rc))
2060 break;
2061
2062 if (pSignatureEntry)
2063 {
2064 ASSERT_GUEST_STMT_BREAK( opcode.aValOperand[0].aOperandIndex[0].indexRepresentation == VGPU10_OPERAND_INDEX_IMMEDIATE32
2065 || opcode.aValOperand[0].aOperandIndex[0].indexRepresentation == VGPU10_OPERAND_INDEX_IMMEDIATE64,
2066 rc = VERR_NOT_SUPPORTED);
2067
2068 uint32_t const indexDimension = opcode.aValOperand[0].indexDimension;
2069 if (indexDimension == VGPU10_OPERAND_INDEX_0D)
2070 {
2071 if (opcode.aValOperand[0].operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID)
2072 {
2073 pSignatureEntry->registerIndex = 0;
2074 pSignatureEntry->semanticName = SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID;
2075 }
2076 else if (opcode.aValOperand[0].operandType == VGPU10_OPERAND_TYPE_OUTPUT_DEPTH)
2077 {
2078 /* oDepth is always last in the signature. Register index is equal to 0xFFFFFFFF. */
2079 pSignatureEntry->registerIndex = 0xFFFFFFFF;
2080 pSignatureEntry->semanticName = SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED;
2081 }
2082 else if (opcode.aValOperand[0].operandType <= VGPU10_OPERAND_TYPE_SM50_MAX)
2083 {
2084 pSignatureEntry->registerIndex = 0;
2085 pSignatureEntry->semanticName = opcode.semanticName;
2086 }
2087 else
2088 ASSERT_GUEST_FAILED_STMT_BREAK(rc = VERR_NOT_SUPPORTED);
2089 }
2090 else
2091 {
2092 ASSERT_GUEST_STMT_BREAK( indexDimension == VGPU10_OPERAND_INDEX_1D
2093 || indexDimension == VGPU10_OPERAND_INDEX_2D
2094 || indexDimension == VGPU10_OPERAND_INDEX_3D,
2095 rc = VERR_NOT_SUPPORTED);
2096 /* The register index seems to be in the highest dimension. */
2097 pSignatureEntry->registerIndex = opcode.aValOperand[0].aOperandIndex[indexDimension - VGPU10_OPERAND_INDEX_1D].iOperandImmediate;
2098 pSignatureEntry->semanticName = opcode.semanticName;
2099 }
2100 pSignatureEntry->mask = opcode.aValOperand[0].mask;
2101 pSignatureEntry->componentType = SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN; // Will be updated by vboxDXUpdateVSInputSignature
2102 pSignatureEntry->minPrecision = SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT;
2103 }
2104 }
2105 }
2106
2107 if (RT_FAILURE(rc))
2108 {
2109 return rc;
2110 }
2111
2112 rc = dxbcOutputFinalize(&outctx, w);
2113 if (RT_FAILURE(rc))
2114 {
2115 return rc;
2116 }
2117
2118 dxbcByteWriterFetchData(w, &pInfo->pvBytecode, &pInfo->cbBytecode);
2119 uint32_t *pcOutputToken = (uint32_t *)pInfo->pvBytecode + 1;
2120 *pcOutputToken = pInfo->cbBytecode / 4;
2121
2122 /* Sort signatures by register index and mask because the host API need them to be sorted. */
2123 if (pInfo->cInputSignature)
2124 {
2125 RTSortShell(pInfo->aInputSignature, pInfo->cInputSignature, sizeof(pInfo->aInputSignature[0]),
2126 signatureEntryCmp, NULL);
2127 dxbcGenerateSemantics(pInfo, pInfo->cInputSignature,
2128 pInfo->aInputSignature,
2129 pInfo->aInputSemantic, DXBC_BLOB_TYPE_ISGN);
2130 }
2131 if (pInfo->cOutputSignature)
2132 {
2133 RTSortShell(pInfo->aOutputSignature, pInfo->cOutputSignature, sizeof(pInfo->aOutputSignature[0]),
2134 signatureEntryCmp, NULL);
2135 dxbcGenerateSemantics(pInfo, pInfo->cOutputSignature,
2136 pInfo->aOutputSignature,
2137 pInfo->aOutputSemantic, DXBC_BLOB_TYPE_OSGN);
2138 }
2139 if (pInfo->cPatchConstantSignature)
2140 {
2141 RTSortShell(pInfo->aPatchConstantSignature, pInfo->cPatchConstantSignature, sizeof(pInfo->aPatchConstantSignature[0]),
2142 signatureEntryCmp, NULL);
2143 dxbcGenerateSemantics(pInfo, pInfo->cPatchConstantSignature,
2144 pInfo->aPatchConstantSignature,
2145 pInfo->aPatchConstantSemantic, DXBC_BLOB_TYPE_PCSG);
2146 }
2147
2148#ifdef LOG_ENABLED
2149 if (pInfo->cInputSignature)
2150 {
2151 Log6(("Input signatures:\n"));
2152 for (uint32_t i = 0; i < pInfo->cInputSignature; ++i)
2153 Log6((" [%u]: %u %u 0x%X, %s %d\n",
2154 i, pInfo->aInputSignature[i].registerIndex, pInfo->aInputSignature[i].semanticName, pInfo->aInputSignature[i].mask,
2155 pInfo->aInputSemantic[i].pcszSemanticName, pInfo->aInputSemantic[i].SemanticIndex));
2156 }
2157 if (pInfo->cOutputSignature)
2158 {
2159 Log6(("Output signatures:\n"));
2160 for (uint32_t i = 0; i < pInfo->cOutputSignature; ++i)
2161 Log6((" [%u]: %u %u 0x%X, %s %d\n",
2162 i, pInfo->aOutputSignature[i].registerIndex, pInfo->aOutputSignature[i].semanticName, pInfo->aOutputSignature[i].mask,
2163 pInfo->aOutputSemantic[i].pcszSemanticName, pInfo->aOutputSemantic[i].SemanticIndex));
2164 }
2165 if (pInfo->cPatchConstantSignature)
2166 {
2167 Log6(("Patch constant signatures:\n"));
2168 for (uint32_t i = 0; i < pInfo->cPatchConstantSignature; ++i)
2169 Log6((" [%u]: %u %u 0x%X, %s %d\n",
2170 i, pInfo->aPatchConstantSignature[i].registerIndex, pInfo->aPatchConstantSignature[i].semanticName, pInfo->aPatchConstantSignature[i].mask,
2171 pInfo->aPatchConstantSemantic[i].pcszSemanticName, pInfo->aPatchConstantSemantic[i].SemanticIndex));
2172 }
2173#endif
2174
2175 return VINF_SUCCESS;
2176}
2177
2178void DXShaderGenerateSemantics(DXShaderInfo *pInfo)
2179{
2180 if (pInfo->cInputSignature)
2181 dxbcGenerateSemantics(pInfo, pInfo->cInputSignature,
2182 pInfo->aInputSignature,
2183 pInfo->aInputSemantic, DXBC_BLOB_TYPE_ISGN);
2184 if (pInfo->cOutputSignature)
2185 dxbcGenerateSemantics(pInfo, pInfo->cOutputSignature,
2186 pInfo->aOutputSignature,
2187 pInfo->aOutputSemantic, DXBC_BLOB_TYPE_OSGN);
2188 if (pInfo->cPatchConstantSignature)
2189 dxbcGenerateSemantics(pInfo, pInfo->cPatchConstantSignature,
2190 pInfo->aPatchConstantSignature,
2191 pInfo->aPatchConstantSemantic, DXBC_BLOB_TYPE_PCSG);
2192}
2193
2194void DXShaderFree(DXShaderInfo *pInfo)
2195{
2196 RTMemFree(pInfo->pvBytecode);
2197 RT_ZERO(*pInfo);
2198}
2199
2200
2201#if 0 // Unused. Replaced with dxbcSemanticInfo.
2202static char const *dxbcSemanticName(SVGA3dDXSignatureSemanticName enmSemanticName)
2203{
2204 /* https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-semantics#system-value-semantics */
2205 switch (enmSemanticName)
2206 {
2207 case SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION: return "SV_Position";
2208 case SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE: return "SV_ClipDistance";
2209 case SVGADX_SIGNATURE_SEMANTIC_NAME_CULL_DISTANCE: return "SV_CullDistance";
2210 case SVGADX_SIGNATURE_SEMANTIC_NAME_RENDER_TARGET_ARRAY_INDEX: return "SV_RenderTargetArrayIndex";
2211 case SVGADX_SIGNATURE_SEMANTIC_NAME_VIEWPORT_ARRAY_INDEX: return "SV_ViewportArrayIndex";
2212 case SVGADX_SIGNATURE_SEMANTIC_NAME_VERTEX_ID: return "SV_VertexID";
2213 case SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID: return "SV_PrimitiveID";
2214 case SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID: return "SV_InstanceID";
2215 case SVGADX_SIGNATURE_SEMANTIC_NAME_IS_FRONT_FACE: return "SV_IsFrontFace";
2216 case SVGADX_SIGNATURE_SEMANTIC_NAME_SAMPLE_INDEX: return "SV_SampleIndex";
2217 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR: return "SV_FinalQuadUeq0EdgeTessFactor";
2218 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR: return "SV_FinalQuadVeq0EdgeTessFactor";
2219 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR: return "SV_FinalQuadUeq1EdgeTessFactor";
2220 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR: return "SV_FinalQuadVeq1EdgeTessFactor";
2221 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR: return "SV_FinalQuadUInsideTessFactor";
2222 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR: return "SV_FinalQuadVInsideTessFactor";
2223 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR: return "SV_FinalTriUeq0EdgeTessFactor";
2224 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR: return "SV_FinalTriVeq0EdgeTessFactor";
2225 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR: return "SV_FinalTriWeq0EdgeTessFactor";
2226 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR: return "SV_FinalTriInsideTessFactor";
2227 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DETAIL_TESSFACTOR: return "SV_FinalLineDetailTessFactor";
2228 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DENSITY_TESSFACTOR: return "SV_FinalLineDensityTessFactor";
2229 default:
2230 Assert(enmSemanticName == SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
2231 break;
2232 }
2233 /* Generic. Arbitrary name. It does not have any meaning. */
2234 return "ATTRIB";
2235}
2236#endif
2237
2238
2239/* https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-semantics#system-value-semantics
2240 * Type:
2241 * 0 - undefined
2242 * 1 - unsigned int
2243 * 2 - signed int
2244 * 3 - float
2245 */
2246typedef struct VGPUSemanticInfo
2247{
2248 char const *pszName;
2249 uint32_t u32Type;
2250} VGPUSemanticInfo;
2251
2252static VGPUSemanticInfo const g_aSemanticInfo[SVGADX_SIGNATURE_SEMANTIC_NAME_MAX] =
2253{
2254 { "ATTRIB", 0 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED 0
2255 { "SV_Position", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION 1
2256 { "SV_ClipDistance", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE 2
2257 { "SV_CullDistance", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_CULL_DISTANCE 3
2258 { "SV_RenderTargetArrayIndex", 1 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_RENDER_TARGET_ARRAY_INDEX 4
2259 { "SV_ViewportArrayIndex", 1 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_VIEWPORT_ARRAY_INDEX 5
2260 { "SV_VertexID", 1 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_VERTEX_ID 6
2261 { "SV_PrimitiveID", 1 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID 7
2262 { "SV_InstanceID", 1 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID 8
2263 { "SV_IsFrontFace", 1 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_IS_FRONT_FACE 9
2264 { "SV_SampleIndex", 1 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_SAMPLE_INDEX 10
2265 /** @todo Is this a correct name for all TessFactors? */
2266 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR 11
2267 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR 12
2268 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR 13
2269 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR 14
2270 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR 15
2271 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR 16
2272 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR 17
2273 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR 18
2274 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR 19
2275 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR 20
2276 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DETAIL_TESSFACTOR 21
2277 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DENSITY_TESSFACTOR 22
2278};
2279
2280static VGPUSemanticInfo const g_SemanticPSOutput =
2281 { "SV_TARGET", 3 }; // SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED 0
2282
2283
2284static VGPUSemanticInfo const *dxbcSemanticInfo(DXShaderInfo const *pInfo, SVGA3dDXSignatureSemanticName enmSemanticName, uint32_t u32BlobType)
2285{
2286 if (enmSemanticName < RT_ELEMENTS(g_aSemanticInfo))
2287 {
2288 if ( enmSemanticName == 0
2289 && pInfo->enmProgramType == VGPU10_PIXEL_SHADER
2290 && u32BlobType == DXBC_BLOB_TYPE_OSGN)
2291 return &g_SemanticPSOutput;
2292 return &g_aSemanticInfo[enmSemanticName];
2293 }
2294 return &g_aSemanticInfo[0];
2295}
2296
2297
2298static void dxbcGenerateSemantics(DXShaderInfo *pInfo, uint32_t cSignature,
2299 SVGA3dDXSignatureEntry const *paSignature,
2300 DXShaderAttributeSemantic *paSemantic,
2301 uint32_t u32BlobType)
2302{
2303 for (uint32_t iSignatureEntry = 0; iSignatureEntry < cSignature; ++iSignatureEntry)
2304 {
2305 SVGA3dDXSignatureEntry const *src = &paSignature[iSignatureEntry];
2306 DXShaderAttributeSemantic *dst = &paSemantic[iSignatureEntry];
2307
2308 ASSERT_GUEST_RETURN_VOID(src->semanticName < SVGADX_SIGNATURE_SEMANTIC_NAME_MAX);
2309
2310 VGPUSemanticInfo const *pSemanticInfo = dxbcSemanticInfo(pInfo, src->semanticName, u32BlobType);
2311 dst->pcszSemanticName = pSemanticInfo->pszName;
2312 dst->SemanticIndex = 0;
2313 for (uint32_t i = 0; i < iSignatureEntry; ++i)
2314 {
2315 DXShaderAttributeSemantic const *pSemantic = &paSemantic[i];
2316 if (RTStrCmp(pSemantic->pcszSemanticName, dst->pcszSemanticName) == 0)
2317 ++dst->SemanticIndex;
2318 }
2319 }
2320}
2321
2322
2323static int dxbcCreateIOSGNBlob(DXShaderInfo const *pInfo, DXBCHeader *pHdr, uint32_t u32BlobType, uint32_t cSignature,
2324 SVGA3dDXSignatureEntry const *paSignature, DXShaderAttributeSemantic const *paSemantic, DXBCByteWriter *w)
2325{
2326 RT_NOREF(pInfo);
2327 AssertReturn(cSignature <= SVGA3D_DX_SM41_MAX_VERTEXINPUTREGISTERS, VERR_INVALID_PARAMETER);
2328
2329 uint32_t cbBlob = RT_UOFFSETOF_DYN(DXBCBlobIOSGN, aElement[cSignature]);
2330 if (!dxbcByteWriterCanWrite(w, sizeof(DXBCBlobHeader) + cbBlob))
2331 return VERR_NO_MEMORY;
2332
2333 DXBCBlobHeader *pHdrBlob = (DXBCBlobHeader *)dxbcByteWriterPtr(w);
2334 pHdrBlob->u32BlobType = u32BlobType;
2335 // pHdrBlob->cbBlob = 0;
2336
2337 DXBCBlobIOSGN *pHdrISGN = (DXBCBlobIOSGN *)&pHdrBlob[1];
2338 pHdrISGN->cElement = cSignature;
2339 pHdrISGN->offElement = RT_UOFFSETOF(DXBCBlobIOSGN, aElement[0]);
2340
2341 for (uint32_t iSignatureEntry = 0; iSignatureEntry < cSignature; ++iSignatureEntry)
2342 {
2343 SVGA3dDXSignatureEntry const *srcEntry = &paSignature[iSignatureEntry];
2344 DXShaderAttributeSemantic const *srcSemantic = &paSemantic[iSignatureEntry];
2345 DXBCBlobIOSGNElement *dst = &pHdrISGN->aElement[iSignatureEntry];
2346
2347 dst->offElementName = 0;
2348 for (uint32_t i = 0; i < iSignatureEntry; ++i)
2349 {
2350 DXBCBlobIOSGNElement const *pElement = &pHdrISGN->aElement[i];
2351 char const *pszElementName = (char *)pHdrISGN + pElement->offElementName;
2352 if (RTStrCmp(pszElementName, srcSemantic->pcszSemanticName) == 0)
2353 {
2354 dst->offElementName = pElement->offElementName;
2355 break;
2356 }
2357 }
2358 dst->idxSemantic = srcSemantic->SemanticIndex;
2359 dst->enmSystemValue = srcEntry->semanticName;
2360 dst->enmComponentType = srcEntry->componentType;
2361 dst->idxRegister = srcEntry->registerIndex;
2362 dst->u.mask = srcEntry->mask;
2363
2364 if (dst->offElementName == 0)
2365 {
2366 /* Store the semantic name for this element. */
2367 dst->offElementName = cbBlob; /* Offset of the semantic's name relative to the start of the blob (without DXBCBlobHeader). */
2368 uint32_t const cbElementName = (uint32_t)strlen(srcSemantic->pcszSemanticName) + 1;
2369 if (!dxbcByteWriterCanWrite(w, sizeof(DXBCBlobHeader) + cbBlob + cbElementName))
2370 return VERR_NO_MEMORY;
2371
2372 memcpy((char *)pHdrISGN + dst->offElementName, srcSemantic->pcszSemanticName, cbElementName);
2373 cbBlob += cbElementName;
2374 }
2375 }
2376
2377 /* Blobs are 4 bytes aligned. Commit the blob data. */
2378 cbBlob = RT_ALIGN_32(cbBlob, 4);
2379 pHdrBlob->cbBlob = cbBlob;
2380 pHdr->cbTotal += cbBlob + sizeof(DXBCBlobHeader);
2381 dxbcByteWriterCommit(w, cbBlob + sizeof(DXBCBlobHeader));
2382 return VINF_SUCCESS;
2383}
2384
2385
2386static int dxbcCreateSHDRBlob(DXBCHeader *pHdr, uint32_t u32BlobType,
2387 void const *pvShader, uint32_t cbShader, DXBCByteWriter *w)
2388{
2389 uint32_t cbBlob = cbShader;
2390 if (!dxbcByteWriterCanWrite(w, sizeof(DXBCBlobHeader) + cbBlob))
2391 return VERR_NO_MEMORY;
2392
2393 DXBCBlobHeader *pHdrBlob = (DXBCBlobHeader *)dxbcByteWriterPtr(w);
2394 pHdrBlob->u32BlobType = u32BlobType;
2395 // pHdrBlob->cbBlob = 0;
2396
2397 memcpy(&pHdrBlob[1], pvShader, cbShader);
2398
2399 /* Blobs are 4 bytes aligned. Commit the blob data. */
2400 cbBlob = RT_ALIGN_32(cbBlob, 4);
2401 pHdrBlob->cbBlob = cbBlob;
2402 pHdr->cbTotal += cbBlob + sizeof(DXBCBlobHeader);
2403 dxbcByteWriterCommit(w, cbBlob + sizeof(DXBCBlobHeader));
2404 return VINF_SUCCESS;
2405}
2406
2407
2408/*
2409 * Create a DXBC container with signature and shader code data blobs.
2410 */
2411static int dxbcCreateFromInfo(DXShaderInfo const *pInfo, void const *pvShader, uint32_t cbShader, DXBCByteWriter *w)
2412{
2413 int rc;
2414
2415 /* Create a DXBC container with ISGN, OSGN and SHDR blobs. */
2416 uint32_t cBlob = 3;
2417 if ( pInfo->enmProgramType == VGPU10_HULL_SHADER
2418 || pInfo->enmProgramType == VGPU10_DOMAIN_SHADER)
2419 ++cBlob;
2420
2421 uint32_t const cbHdr = RT_UOFFSETOF_DYN(DXBCHeader, aBlobOffset[cBlob]); /* Header with blob offsets. */
2422 if (!dxbcByteWriterCanWrite(w, cbHdr))
2423 return VERR_NO_MEMORY;
2424
2425 /* Container header. */
2426 DXBCHeader *pHdr = (DXBCHeader *)dxbcByteWriterPtr(w);
2427 pHdr->u32DXBC = DXBC_MAGIC;
2428 // RT_ZERO(pHdr->au8Hash);
2429 pHdr->u32Version = 1;
2430 pHdr->cbTotal = cbHdr;
2431 pHdr->cBlob = cBlob;
2432 //RT_ZERO(pHdr->aBlobOffset);
2433 dxbcByteWriterCommit(w, cbHdr);
2434
2435#ifdef LOG_ENABLED
2436 if (pInfo->cInputSignature)
2437 {
2438 Log6(("Input signatures:\n"));
2439 for (uint32_t i = 0; i < pInfo->cInputSignature; ++i)
2440 Log6((" [%u]: %u %u 0x%X, %s %d\n",
2441 i, pInfo->aInputSignature[i].registerIndex, pInfo->aInputSignature[i].semanticName, pInfo->aInputSignature[i].mask,
2442 pInfo->aInputSemantic[i].pcszSemanticName, pInfo->aInputSemantic[i].SemanticIndex));
2443 }
2444 if (pInfo->cOutputSignature)
2445 {
2446 Log6(("Output signatures:\n"));
2447 for (uint32_t i = 0; i < pInfo->cOutputSignature; ++i)
2448 Log6((" [%u]: %u %u 0x%X, %s %d\n",
2449 i, pInfo->aOutputSignature[i].registerIndex, pInfo->aOutputSignature[i].semanticName, pInfo->aOutputSignature[i].mask,
2450 pInfo->aOutputSemantic[i].pcszSemanticName, pInfo->aOutputSemantic[i].SemanticIndex));
2451 }
2452 if (pInfo->cPatchConstantSignature)
2453 {
2454 Log6(("Patch constant signatures:\n"));
2455 for (uint32_t i = 0; i < pInfo->cPatchConstantSignature; ++i)
2456 Log6((" [%u]: %u %u 0x%X, %s %d\n",
2457 i, pInfo->aPatchConstantSignature[i].registerIndex, pInfo->aPatchConstantSignature[i].semanticName, pInfo->aPatchConstantSignature[i].mask,
2458 pInfo->aPatchConstantSemantic[i].pcszSemanticName, pInfo->aPatchConstantSemantic[i].SemanticIndex));
2459 }
2460#endif
2461
2462 /* Blobs. */
2463 uint32_t iBlob = 0;
2464
2465 pHdr->aBlobOffset[iBlob++] = dxbcByteWriterSize(w);
2466 rc = dxbcCreateIOSGNBlob(pInfo, pHdr, DXBC_BLOB_TYPE_ISGN, pInfo->cInputSignature, &pInfo->aInputSignature[0], pInfo->aInputSemantic, w);
2467 AssertRCReturn(rc, rc);
2468
2469 pHdr->aBlobOffset[iBlob++] = dxbcByteWriterSize(w);
2470 rc = dxbcCreateIOSGNBlob(pInfo, pHdr, DXBC_BLOB_TYPE_OSGN, pInfo->cOutputSignature, &pInfo->aOutputSignature[0], pInfo->aOutputSemantic, w);
2471 AssertRCReturn(rc, rc);
2472
2473 if ( pInfo->enmProgramType == VGPU10_HULL_SHADER
2474 || pInfo->enmProgramType == VGPU10_DOMAIN_SHADER)
2475 {
2476 pHdr->aBlobOffset[iBlob++] = dxbcByteWriterSize(w);
2477 rc = dxbcCreateIOSGNBlob(pInfo, pHdr, DXBC_BLOB_TYPE_PCSG, pInfo->cPatchConstantSignature, &pInfo->aPatchConstantSignature[0], pInfo->aPatchConstantSemantic, w);
2478 AssertRCReturn(rc, rc);
2479 }
2480
2481 pHdr->aBlobOffset[iBlob++] = dxbcByteWriterSize(w);
2482 rc = dxbcCreateSHDRBlob(pHdr, DXBC_BLOB_TYPE_SHDR, pvShader, cbShader, w);
2483 AssertRCReturn(rc, rc);
2484
2485 Assert(iBlob == cBlob);
2486
2487 AssertCompile(RT_UOFFSETOF(DXBCHeader, u32Version) == 0x14);
2488 dxbcHash(&pHdr->u32Version, pHdr->cbTotal - RT_UOFFSETOF(DXBCHeader, u32Version), pHdr->au8Hash);
2489
2490 return VINF_SUCCESS;
2491}
2492
2493
2494int DXShaderCreateDXBC(DXShaderInfo const *pInfo, void **ppvDXBC, uint32_t *pcbDXBC)
2495{
2496 /* Build DXBC container. */
2497 int rc;
2498 DXBCByteWriter dxbcByteWriter;
2499 DXBCByteWriter *w = &dxbcByteWriter;
2500 if (dxbcByteWriterInit(w, 4096 + pInfo->cbBytecode))
2501 {
2502 rc = dxbcCreateFromInfo(pInfo, pInfo->pvBytecode, pInfo->cbBytecode, w);
2503 if (RT_SUCCESS(rc))
2504 dxbcByteWriterFetchData(w, ppvDXBC, pcbDXBC);
2505 }
2506 else
2507 rc = VERR_NO_MEMORY;
2508 return rc;
2509}
2510
2511
2512static char const *dxbcGetOutputSemanticName(DXShaderInfo const *pInfo, uint32_t idxRegister, uint32_t u32BlobType,
2513 uint32_t cSignature, SVGA3dDXSignatureEntry const *paSignature,
2514 SVGA3dDXSignatureSemanticName *pSemanticName)
2515{
2516 for (uint32_t i = 0; i < cSignature; ++i)
2517 {
2518 SVGA3dDXSignatureEntry const *p = &paSignature[i];
2519 if (p->registerIndex == idxRegister)
2520 {
2521 AssertReturn(p->semanticName < SVGADX_SIGNATURE_SEMANTIC_NAME_MAX, NULL);
2522 VGPUSemanticInfo const *pSemanticInfo = dxbcSemanticInfo(pInfo, p->semanticName, u32BlobType);
2523 *pSemanticName = p->semanticName;
2524 return pSemanticInfo->pszName;
2525 }
2526 }
2527 return NULL;
2528}
2529
2530char const *DXShaderGetOutputSemanticName(DXShaderInfo const *pInfo, uint32_t idxRegister, SVGA3dDXSignatureSemanticName *pSemanticName)
2531{
2532 return dxbcGetOutputSemanticName(pInfo, idxRegister, DXBC_BLOB_TYPE_OSGN, pInfo->cOutputSignature, &pInfo->aOutputSignature[0], pSemanticName);
2533}
2534
2535VGPU10_RESOURCE_RETURN_TYPE DXShaderResourceReturnTypeFromFormat(SVGA3dSurfaceFormat format)
2536{
2537 /** @todo This is auto-generated from format names and needs a review. */
2538 switch (format)
2539 {
2540 case SVGA3D_R32G32B32A32_UINT: return VGPU10_RETURN_TYPE_UINT;
2541 case SVGA3D_R32G32B32A32_SINT: return VGPU10_RETURN_TYPE_SINT;
2542 case SVGA3D_R32G32B32_FLOAT: return VGPU10_RETURN_TYPE_FLOAT;
2543 case SVGA3D_R32G32B32_UINT: return VGPU10_RETURN_TYPE_UINT;
2544 case SVGA3D_R32G32B32_SINT: return VGPU10_RETURN_TYPE_SINT;
2545 case SVGA3D_R16G16B16A16_UINT: return VGPU10_RETURN_TYPE_UINT;
2546 case SVGA3D_R16G16B16A16_SNORM: return VGPU10_RETURN_TYPE_SNORM;
2547 case SVGA3D_R16G16B16A16_SINT: return VGPU10_RETURN_TYPE_SINT;
2548 case SVGA3D_R32G32_UINT: return VGPU10_RETURN_TYPE_UINT;
2549 case SVGA3D_R32G32_SINT: return VGPU10_RETURN_TYPE_SINT;
2550 case SVGA3D_D32_FLOAT_S8X24_UINT: return VGPU10_RETURN_TYPE_UINT;
2551 case SVGA3D_R32_FLOAT_X8X24: return VGPU10_RETURN_TYPE_FLOAT;
2552 case SVGA3D_X32_G8X24_UINT: return VGPU10_RETURN_TYPE_UINT;
2553 case SVGA3D_R10G10B10A2_UINT: return VGPU10_RETURN_TYPE_UINT;
2554 case SVGA3D_R11G11B10_FLOAT: return VGPU10_RETURN_TYPE_FLOAT;
2555 case SVGA3D_R8G8B8A8_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2556 case SVGA3D_R8G8B8A8_UNORM_SRGB: return VGPU10_RETURN_TYPE_UNORM;
2557 case SVGA3D_R8G8B8A8_UINT: return VGPU10_RETURN_TYPE_UINT;
2558 case SVGA3D_R8G8B8A8_SINT: return VGPU10_RETURN_TYPE_SINT;
2559 case SVGA3D_R16G16_UINT: return VGPU10_RETURN_TYPE_UINT;
2560 case SVGA3D_R16G16_SINT: return VGPU10_RETURN_TYPE_SINT;
2561 case SVGA3D_D32_FLOAT: return VGPU10_RETURN_TYPE_FLOAT;
2562 case SVGA3D_R32_UINT: return VGPU10_RETURN_TYPE_UINT;
2563 case SVGA3D_R32_SINT: return VGPU10_RETURN_TYPE_SINT;
2564 case SVGA3D_D24_UNORM_S8_UINT: return VGPU10_RETURN_TYPE_UNORM;
2565 case SVGA3D_R24_UNORM_X8: return VGPU10_RETURN_TYPE_UNORM;
2566 case SVGA3D_X24_G8_UINT: return VGPU10_RETURN_TYPE_UINT;
2567 case SVGA3D_R8G8_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2568 case SVGA3D_R8G8_UINT: return VGPU10_RETURN_TYPE_UINT;
2569 case SVGA3D_R8G8_SINT: return VGPU10_RETURN_TYPE_SINT;
2570 case SVGA3D_R16_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2571 case SVGA3D_R16_UINT: return VGPU10_RETURN_TYPE_UINT;
2572 case SVGA3D_R16_SNORM: return VGPU10_RETURN_TYPE_SNORM;
2573 case SVGA3D_R16_SINT: return VGPU10_RETURN_TYPE_SINT;
2574 case SVGA3D_R8_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2575 case SVGA3D_R8_UINT: return VGPU10_RETURN_TYPE_UINT;
2576 case SVGA3D_R8_SNORM: return VGPU10_RETURN_TYPE_SNORM;
2577 case SVGA3D_R8_SINT: return VGPU10_RETURN_TYPE_SINT;
2578 case SVGA3D_R8G8_B8G8_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2579 case SVGA3D_G8R8_G8B8_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2580 case SVGA3D_BC1_UNORM_SRGB: return VGPU10_RETURN_TYPE_UNORM;
2581 case SVGA3D_BC2_UNORM_SRGB: return VGPU10_RETURN_TYPE_UNORM;
2582 case SVGA3D_BC3_UNORM_SRGB: return VGPU10_RETURN_TYPE_UNORM;
2583 case SVGA3D_BC4_SNORM: return VGPU10_RETURN_TYPE_SNORM;
2584 case SVGA3D_BC5_SNORM: return VGPU10_RETURN_TYPE_SNORM;
2585 case SVGA3D_R10G10B10_XR_BIAS_A2_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2586 case SVGA3D_B8G8R8A8_UNORM_SRGB: return VGPU10_RETURN_TYPE_UNORM;
2587 case SVGA3D_B8G8R8X8_UNORM_SRGB: return VGPU10_RETURN_TYPE_UNORM;
2588 case SVGA3D_R32G32B32A32_FLOAT: return VGPU10_RETURN_TYPE_FLOAT;
2589 case SVGA3D_R16G16B16A16_FLOAT: return VGPU10_RETURN_TYPE_FLOAT;
2590 case SVGA3D_R16G16B16A16_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2591 case SVGA3D_R32G32_FLOAT: return VGPU10_RETURN_TYPE_FLOAT;
2592 case SVGA3D_R10G10B10A2_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2593 case SVGA3D_R8G8B8A8_SNORM: return VGPU10_RETURN_TYPE_SNORM;
2594 case SVGA3D_R16G16_FLOAT: return VGPU10_RETURN_TYPE_FLOAT;
2595 case SVGA3D_R16G16_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2596 case SVGA3D_R16G16_SNORM: return VGPU10_RETURN_TYPE_SNORM;
2597 case SVGA3D_R32_FLOAT: return VGPU10_RETURN_TYPE_FLOAT;
2598 case SVGA3D_R8G8_SNORM: return VGPU10_RETURN_TYPE_SNORM;
2599 case SVGA3D_R16_FLOAT: return VGPU10_RETURN_TYPE_FLOAT;
2600 case SVGA3D_D16_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2601 case SVGA3D_A8_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2602 case SVGA3D_BC1_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2603 case SVGA3D_BC2_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2604 case SVGA3D_BC3_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2605 case SVGA3D_B5G6R5_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2606 case SVGA3D_B5G5R5A1_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2607 case SVGA3D_B8G8R8A8_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2608 case SVGA3D_B8G8R8X8_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2609 case SVGA3D_BC4_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2610 case SVGA3D_BC5_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2611 case SVGA3D_B4G4R4A4_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2612 case SVGA3D_BC7_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2613 case SVGA3D_BC7_UNORM_SRGB: return VGPU10_RETURN_TYPE_UNORM;
2614 case SVGA3D_R9G9B9E5_SHAREDEXP: return VGPU10_RETURN_TYPE_FLOAT;
2615 default:
2616 break;
2617 }
2618 return VGPU10_RETURN_TYPE_UNORM;
2619}
2620
2621
2622SVGA3dDXSignatureRegisterComponentType DXShaderComponentTypeFromFormat(SVGA3dSurfaceFormat format)
2623{
2624 /** @todo This is auto-generated from format names and needs a review. */
2625 switch (format)
2626 {
2627 case SVGA3D_R32G32B32A32_UINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_UINT32;
2628 case SVGA3D_R32G32B32A32_SINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_SINT32;
2629 case SVGA3D_R32G32B32_FLOAT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2630 case SVGA3D_R32G32B32_UINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_UINT32;
2631 case SVGA3D_R32G32B32_SINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_SINT32;
2632 case SVGA3D_R16G16B16A16_UINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_UINT32;
2633 case SVGA3D_R16G16B16A16_SNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2634 case SVGA3D_R16G16B16A16_SINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_SINT32;
2635 case SVGA3D_R32G32_UINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_UINT32;
2636 case SVGA3D_R32G32_SINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_SINT32;
2637 case SVGA3D_D32_FLOAT_S8X24_UINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_UINT32;
2638 case SVGA3D_R32_FLOAT_X8X24: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2639 case SVGA3D_X32_G8X24_UINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_UINT32;
2640 case SVGA3D_R10G10B10A2_UINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_UINT32;
2641 case SVGA3D_R11G11B10_FLOAT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2642 case SVGA3D_R8G8B8A8_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2643 case SVGA3D_R8G8B8A8_UNORM_SRGB: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2644 case SVGA3D_R8G8B8A8_UINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_UINT32;
2645 case SVGA3D_R8G8B8A8_SINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_SINT32;
2646 case SVGA3D_R16G16_UINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_UINT32;
2647 case SVGA3D_R16G16_SINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_SINT32;
2648 case SVGA3D_D32_FLOAT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2649 case SVGA3D_R32_UINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_UINT32;
2650 case SVGA3D_R32_SINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_SINT32;
2651 case SVGA3D_D24_UNORM_S8_UINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2652 case SVGA3D_R24_UNORM_X8: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2653 case SVGA3D_X24_G8_UINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_UINT32;
2654 case SVGA3D_R8G8_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2655 case SVGA3D_R8G8_UINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_UINT32;
2656 case SVGA3D_R8G8_SINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_SINT32;
2657 case SVGA3D_R16_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2658 case SVGA3D_R16_UINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_UINT32;
2659 case SVGA3D_R16_SNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2660 case SVGA3D_R16_SINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_SINT32;
2661 case SVGA3D_R8_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2662 case SVGA3D_R8_UINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_UINT32;
2663 case SVGA3D_R8_SNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2664 case SVGA3D_R8_SINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_SINT32;
2665 case SVGA3D_R8G8_B8G8_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2666 case SVGA3D_G8R8_G8B8_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2667 case SVGA3D_BC1_UNORM_SRGB: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2668 case SVGA3D_BC2_UNORM_SRGB: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2669 case SVGA3D_BC3_UNORM_SRGB: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2670 case SVGA3D_BC4_SNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2671 case SVGA3D_BC5_SNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2672 case SVGA3D_R10G10B10_XR_BIAS_A2_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2673 case SVGA3D_B8G8R8A8_UNORM_SRGB: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2674 case SVGA3D_B8G8R8X8_UNORM_SRGB: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2675 case SVGA3D_R32G32B32A32_FLOAT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2676 case SVGA3D_R16G16B16A16_FLOAT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2677 case SVGA3D_R16G16B16A16_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2678 case SVGA3D_R32G32_FLOAT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2679 case SVGA3D_R10G10B10A2_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2680 case SVGA3D_R8G8B8A8_SNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2681 case SVGA3D_R16G16_FLOAT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2682 case SVGA3D_R16G16_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2683 case SVGA3D_R16G16_SNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2684 case SVGA3D_R32_FLOAT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2685 case SVGA3D_R8G8_SNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2686 case SVGA3D_R16_FLOAT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2687 case SVGA3D_D16_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2688 case SVGA3D_A8_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2689 case SVGA3D_BC1_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2690 case SVGA3D_BC2_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2691 case SVGA3D_BC3_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2692 case SVGA3D_B5G6R5_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2693 case SVGA3D_B5G5R5A1_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2694 case SVGA3D_B8G8R8A8_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2695 case SVGA3D_B8G8R8X8_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2696 case SVGA3D_BC4_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2697 case SVGA3D_BC5_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2698 case SVGA3D_B4G4R4A4_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2699 case SVGA3D_BC7_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2700 case SVGA3D_BC7_UNORM_SRGB: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2701 case SVGA3D_R9G9B9E5_SHAREDEXP: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2702 default:
2703 break;
2704 }
2705 return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2706}
2707
2708
2709int DXShaderUpdateResources(DXShaderInfo const *pInfo, VGPU10_RESOURCE_DIMENSION *paResourceDimension,
2710 VGPU10_RESOURCE_RETURN_TYPE *paResourceReturnType, uint32_t cResources)
2711{
2712 for (uint32_t i = 0; i < pInfo->cDclResource; ++i)
2713 {
2714 VGPU10_RESOURCE_DIMENSION const resourceDimension = i < cResources ? paResourceDimension[i] : VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
2715 AssertContinue(resourceDimension <= VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY);
2716
2717 VGPU10_RESOURCE_RETURN_TYPE const resourceReturnType = i < cResources ? paResourceReturnType[i] : VGPU10_RETURN_TYPE_FLOAT;
2718 AssertContinue(resourceReturnType <= VGPU10_RETURN_TYPE_MIXED);
2719
2720 uint32_t const offToken = pInfo->aOffDclResource[i];
2721 AssertContinue(offToken < pInfo->cbBytecode);
2722 if (offToken == 0) /* nothing at this index */
2723 continue;
2724
2725 uint32_t *paToken = (uint32_t *)((uintptr_t)pInfo->pvBytecode + offToken);
2726
2727 VGPU10OpcodeToken0 *pOpcode = (VGPU10OpcodeToken0 *)&paToken[0];
2728 pOpcode->resourceDimension = resourceDimension;
2729 // paToken[1] unmodified
2730 // paToken[2] unmodified
2731 VGPU10ResourceReturnTypeToken *pReturnTypeToken = (VGPU10ResourceReturnTypeToken *)&paToken[3];
2732 pReturnTypeToken->component0 = (uint8_t)resourceReturnType;
2733 pReturnTypeToken->component1 = (uint8_t)resourceReturnType;
2734 pReturnTypeToken->component2 = (uint8_t)resourceReturnType;
2735 pReturnTypeToken->component3 = (uint8_t)resourceReturnType;
2736 }
2737
2738 return VINF_SUCCESS;
2739}
2740
2741#ifdef DXBC_STANDALONE_TEST
2742static int dxbcCreateFromBytecode(void const *pvShaderCode, uint32_t cbShaderCode, void **ppvDXBC, uint32_t *pcbDXBC)
2743{
2744 /* Parse the shader bytecode and create DXBC container with resource, signature and shader bytecode blobs. */
2745 DXShaderInfo info;
2746 RT_ZERO(info);
2747 int rc = DXShaderParse(pvShaderCode, cbShaderCode, &info);
2748 if (RT_SUCCESS(rc))
2749 rc = DXShaderCreateDXBC(&info, ppvDXBC, pcbDXBC);
2750 return rc;
2751}
2752
2753static int parseShaderVM(void const *pvShaderCode, uint32_t cbShaderCode)
2754{
2755 void *pv = NULL;
2756 uint32_t cb = 0;
2757 int rc = dxbcCreateFromBytecode(pvShaderCode, cbShaderCode, &pv, &cb);
2758 if (RT_SUCCESS(rc))
2759 {
2760 /* Hexdump DXBC */
2761 printf("{\n");
2762 uint8_t *pu8 = (uint8_t *)pv;
2763 for (uint32_t i = 0; i < cb; ++i)
2764 {
2765 if ((i % 16) == 0)
2766 {
2767 if (i > 0)
2768 printf(",\n");
2769
2770 printf(" 0x%02x", pu8[i]);
2771 }
2772 else
2773 {
2774 printf(", 0x%02x", pu8[i]);
2775 }
2776 }
2777 printf("\n");
2778 printf("};\n");
2779
2780 RTMemFree(pv);
2781 }
2782
2783 return rc;
2784}
2785
2786static DXBCBlobHeader *dxbcFindBlob(DXBCHeader *pDXBCHeader, uint32_t u32BlobType)
2787{
2788 uint8_t const *pu8DXBCBegin = (uint8_t *)pDXBCHeader;
2789 for (uint32_t i = 0; i < pDXBCHeader->cBlob; ++i)
2790 {
2791 DXBCBlobHeader *pCurrentBlob = (DXBCBlobHeader *)&pu8DXBCBegin[pDXBCHeader->aBlobOffset[i]];
2792 if (pCurrentBlob->u32BlobType == u32BlobType)
2793 return pCurrentBlob;
2794 }
2795 return NULL;
2796}
2797
2798static int dxbcExtractShaderCode(DXBCHeader *pDXBCHeader, void **ppvCode, uint32_t *pcbCode)
2799{
2800 DXBCBlobHeader *pBlob = dxbcFindBlob(pDXBCHeader, DXBC_BLOB_TYPE_SHDR);
2801 AssertReturn(pBlob, VERR_NOT_IMPLEMENTED);
2802
2803 DXBCBlobSHDR *pSHDR = (DXBCBlobSHDR *)&pBlob[1];
2804 *pcbCode = pSHDR->cToken * 4;
2805 *ppvCode = RTMemAlloc(*pcbCode);
2806 AssertReturn(*ppvCode, VERR_NO_MEMORY);
2807
2808 memcpy(*ppvCode, pSHDR, *pcbCode);
2809 return VINF_SUCCESS;
2810}
2811
2812static int parseShaderDXBC(void const *pvDXBC)
2813{
2814 DXBCHeader *pDXBCHeader = (DXBCHeader *)pvDXBC;
2815 void *pvShaderCode = NULL;
2816 uint32_t cbShaderCode = 0;
2817 int rc = dxbcExtractShaderCode(pDXBCHeader, &pvShaderCode, &cbShaderCode);
2818 if (RT_SUCCESS(rc))
2819 {
2820 rc = parseShaderVM(pvShaderCode, cbShaderCode);
2821 RTMemFree(pvShaderCode);
2822 }
2823 return rc;
2824}
2825#endif /* DXBC_STANDALONE_TEST */
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette