VirtualBox

source: vbox/trunk/src/VBox/Devices/Graphics/DevVGA-SVGA3d-dx-shader.cpp@ 95016

最後變更 在這個檔案從95016是 95013,由 vboxsync 提交於 3 年 前

Devices/Graphics: fix for shader signatures: bugref:9830

  • 屬性 svn:eol-style 設為 native
  • 屬性 svn:keywords 設為 Author Date Id Revision
檔案大小: 115.3 KB
 
1/* $Id: DevVGA-SVGA3d-dx-shader.cpp 95013 2022-05-15 08:04:44Z vboxsync $ */
2/** @file
3 * DevVMWare - VMWare SVGA device - VGPU10+ (DX) shader utilities.
4 */
5
6/*
7 * Copyright (C) 2020-2022 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.alldomusa.eu.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#define LOG_GROUP LOG_GROUP_DEV_VMSVGA
23#include <VBox/AssertGuest.h>
24#include <VBox/log.h>
25
26#include <iprt/asm.h>
27#include <iprt/md5.h>
28#include <iprt/mem.h>
29#include <iprt/string.h>
30
31#include "DevVGA-SVGA3d-dx-shader.h"
32
33#ifdef RT_OS_WINDOWS
34#include <d3d11TokenizedProgramFormat.hpp>
35#else
36#define D3D11_SB_EXTENDED_OPCODE_RESOURCE_DIM 2
37#define D3D11_SB_EXTENDED_OPCODE_RESOURCE_RETURN_TYPE 3
38#endif
39
40/*
41 *
42 * DXBC shader binary format definitions.
43 *
44 */
45
46/* DXBC container header. */
47typedef struct DXBCHeader
48{
49 uint32_t u32DXBC; /* 0x43425844 = 'D', 'X', 'B', 'C' */
50 uint8_t au8Hash[16]; /* Modified MD5 hash. See dxbcHash. */
51 uint32_t u32Version; /* 1 */
52 uint32_t cbTotal; /* Total size in bytes. Including the header. */
53 uint32_t cBlob; /* Number of entries in aBlobOffset array. */
54 uint32_t aBlobOffset[1]; /* Offsets of blobs from the start of DXBC header. */
55} DXBCHeader;
56
57#define DXBC_MAGIC RT_MAKE_U32_FROM_U8('D', 'X', 'B', 'C')
58
59/* DXBC blob header. */
60typedef struct DXBCBlobHeader
61{
62 uint32_t u32BlobType; /* FourCC code. DXBC_BLOB_TYPE_* */
63 uint32_t cbBlob; /* Size of the blob excluding the blob header. 4 bytes aligned. */
64 /* Followed by the blob's data. */
65} DXBCBlobHeader;
66
67/* DXBC blob types. */
68#define DXBC_BLOB_TYPE_ISGN RT_MAKE_U32_FROM_U8('I', 'S', 'G', 'N')
69#define DXBC_BLOB_TYPE_OSGN RT_MAKE_U32_FROM_U8('O', 'S', 'G', 'N')
70#define DXBC_BLOB_TYPE_SHDR RT_MAKE_U32_FROM_U8('S', 'H', 'D', 'R')
71/** @todo More... */
72
73/* 'SHDR' blob data format. */
74typedef struct DXBCBlobSHDR
75{
76 VGPU10ProgramToken programToken;
77 uint32_t cToken; /* Number of 32 bit tokens including programToken and cToken. */
78 uint32_t au32Token[1]; /* cToken - 2 number of tokens. */
79} DXBCBlobSHDR;
80
81/* Element of an input or output signature. */
82typedef struct DXBCBlobIOSGNElement
83{
84 uint32_t offElementName; /* Offset of the semantic's name relative to the start of the blob data. */
85 uint32_t idxSemantic; /* Semantic index. */
86 uint32_t enmSystemValue; /* SVGA3dDXSignatureSemanticName */
87 uint32_t enmComponentType; /* 1 - unsigned, 2 - integer, 3 - float. */
88 uint32_t idxRegister; /* Shader register index. Elements must be sorted by register index. */
89 union
90 {
91 struct
92 {
93 uint32_t mask : 8; /* Component mask. Lower 4 bits represent X, Y, Z, W channels. */
94 uint32_t mask2 : 8; /* Which components are used in the shader. */
95 uint32_t pad : 16;
96 } m;
97 uint32_t mask;
98 } u;
99} DXBCBlobIOSGNElement;
100
101/* 'ISGN' and 'OSGN' blob data format. */
102typedef struct DXBCBlobIOSGN
103{
104 uint32_t cElement; /* Number of signature elements. */
105 uint32_t offElement; /* Offset of the first element from the start of the blob. Equals to 8. */
106 DXBCBlobIOSGNElement aElement[1]; /* Signature elements. Size is cElement. */
107 /* Followed by ASCIIZ semantic names. */
108} DXBCBlobIOSGN;
109
110
111/*
112 * VGPU10 shader parser definitions.
113 */
114
115/* Parsed info about an operand index. */
116typedef struct VGPUOperandIndex
117{
118 uint32_t indexRepresentation; /* VGPU10_OPERAND_INDEX_REPRESENTATION */
119 uint64_t iOperandImmediate; /* Needs up to a qword. */
120 struct VGPUOperand *pOperandRelative; /* For VGPU10_OPERAND_INDEX_*RELATIVE */
121} VGPUOperandIndex;
122
123/* Parsed info about an operand. */
124typedef struct VGPUOperand
125{
126 uint32_t numComponents : 2; /* VGPU10_OPERAND_NUM_COMPONENTS */
127 uint32_t selectionMode : 2; /* VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE */
128 uint32_t mask : 4; /* 4-bits X, Y, Z, W mask for VGPU10_OPERAND_4_COMPONENT_MASK_MODE. */
129 uint32_t operandType : 8; /* VGPU10_OPERAND_TYPE */
130 uint32_t indexDimension : 2; /* VGPU10_OPERAND_INDEX_DIMENSION */
131 VGPUOperandIndex aOperandIndex[VGPU10_OPERAND_INDEX_3D]; /* Up to 3. */
132 uint32_t aImm[4]; /* Immediate values for VGPU10_OPERAND_TYPE_IMMEDIATE* */
133 uint32_t cOperandToken; /* Number of tokens in this operand. */
134 uint32_t const *paOperandToken; /* Pointer to operand tokens in the input buffer. */
135} VGPUOperand;
136
137/* Parsed info about an opcode. */
138typedef struct VGPUOpcode
139{
140 uint32_t cOpcodeToken; /* Number of tokens for this operation. */
141 uint32_t opcodeType; /* VGPU10_OPCODE_* */
142 uint32_t opcodeSubtype; /* For example VGPU10_VMWARE_OPCODE_* */
143 uint32_t semanticName; /* SVGA3dDXSignatureSemanticName for system value declarations. */
144 uint32_t cOperand; /* Number of operands for this instruction. */
145 uint32_t aIdxOperand[8]; /* Indices of the instruction operands in the aValOperand array. */
146 /* 8 should be enough for everyone. */
147 VGPUOperand aValOperand[16]; /* Operands including VGPU10_OPERAND_INDEX_*RELATIVE if they are used: */
148 /* Operand1, VGPU10_OPERAND_INDEX_*RELATIVE for Operand1, ... */
149 /* ... */
150 /* OperandN, VGPU10_OPERAND_INDEX_*RELATIVE for OperandN, ... */
151 /* 16 probably should be enough for everyone. */
152 uint32_t const *paOpcodeToken; /* Pointer to opcode tokens in the input buffer. */
153} VGPUOpcode;
154
155typedef struct VGPUOpcodeInfo
156{
157 uint32_t cOperand; /* Number of operands for this opcode. */
158} VGPUOpcodeInfo;
159
160static VGPUOpcodeInfo const g_aOpcodeInfo[] =
161{
162 { 3 }, /* VGPU10_OPCODE_ADD */
163 { 3 }, /* VGPU10_OPCODE_AND */
164 { 0 }, /* VGPU10_OPCODE_BREAK */
165 { 1 }, /* VGPU10_OPCODE_BREAKC */
166 { 1 }, /* VGPU10_OPCODE_CALL */
167 { 2 }, /* VGPU10_OPCODE_CALLC */
168 { 1 }, /* VGPU10_OPCODE_CASE */
169 { 0 }, /* VGPU10_OPCODE_CONTINUE */
170 { 1 }, /* VGPU10_OPCODE_CONTINUEC */
171 { 0 }, /* VGPU10_OPCODE_CUT */
172 { 0 }, /* VGPU10_OPCODE_DEFAULT */
173 { 2 }, /* VGPU10_OPCODE_DERIV_RTX */
174 { 2 }, /* VGPU10_OPCODE_DERIV_RTY */
175 { 1 }, /* VGPU10_OPCODE_DISCARD */
176 { 3 }, /* VGPU10_OPCODE_DIV */
177 { 3 }, /* VGPU10_OPCODE_DP2 */
178 { 3 }, /* VGPU10_OPCODE_DP3 */
179 { 3 }, /* VGPU10_OPCODE_DP4 */
180 { 0 }, /* VGPU10_OPCODE_ELSE */
181 { 0 }, /* VGPU10_OPCODE_EMIT */
182 { 0 }, /* VGPU10_OPCODE_EMITTHENCUT */
183 { 0 }, /* VGPU10_OPCODE_ENDIF */
184 { 0 }, /* VGPU10_OPCODE_ENDLOOP */
185 { 0 }, /* VGPU10_OPCODE_ENDSWITCH */
186 { 3 }, /* VGPU10_OPCODE_EQ */
187 { 2 }, /* VGPU10_OPCODE_EXP */
188 { 2 }, /* VGPU10_OPCODE_FRC */
189 { 2 }, /* VGPU10_OPCODE_FTOI */
190 { 2 }, /* VGPU10_OPCODE_FTOU */
191 { 3 }, /* VGPU10_OPCODE_GE */
192 { 3 }, /* VGPU10_OPCODE_IADD */
193 { 1 }, /* VGPU10_OPCODE_IF */
194 { 3 }, /* VGPU10_OPCODE_IEQ */
195 { 3 }, /* VGPU10_OPCODE_IGE */
196 { 3 }, /* VGPU10_OPCODE_ILT */
197 { 4 }, /* VGPU10_OPCODE_IMAD */
198 { 3 }, /* VGPU10_OPCODE_IMAX */
199 { 3 }, /* VGPU10_OPCODE_IMIN */
200 { 4 }, /* VGPU10_OPCODE_IMUL */
201 { 3 }, /* VGPU10_OPCODE_INE */
202 { 2 }, /* VGPU10_OPCODE_INEG */
203 { 3 }, /* VGPU10_OPCODE_ISHL */
204 { 3 }, /* VGPU10_OPCODE_ISHR */
205 { 2 }, /* VGPU10_OPCODE_ITOF */
206 { 1 }, /* VGPU10_OPCODE_LABEL */
207 { 3 }, /* VGPU10_OPCODE_LD */
208 { 4 }, /* VGPU10_OPCODE_LD_MS */
209 { 2 }, /* VGPU10_OPCODE_LOG */
210 { 0 }, /* VGPU10_OPCODE_LOOP */
211 { 3 }, /* VGPU10_OPCODE_LT */
212 { 4 }, /* VGPU10_OPCODE_MAD */
213 { 3 }, /* VGPU10_OPCODE_MIN */
214 { 3 }, /* VGPU10_OPCODE_MAX */
215 { UINT32_MAX }, /* VGPU10_OPCODE_CUSTOMDATA: special opcode */
216 { 2 }, /* VGPU10_OPCODE_MOV */
217 { 4 }, /* VGPU10_OPCODE_MOVC */
218 { 3 }, /* VGPU10_OPCODE_MUL */
219 { 3 }, /* VGPU10_OPCODE_NE */
220 { 0 }, /* VGPU10_OPCODE_NOP */
221 { 2 }, /* VGPU10_OPCODE_NOT */
222 { 3 }, /* VGPU10_OPCODE_OR */
223 { 3 }, /* VGPU10_OPCODE_RESINFO */
224 { 0 }, /* VGPU10_OPCODE_RET */
225 { 1 }, /* VGPU10_OPCODE_RETC */
226 { 2 }, /* VGPU10_OPCODE_ROUND_NE */
227 { 2 }, /* VGPU10_OPCODE_ROUND_NI */
228 { 2 }, /* VGPU10_OPCODE_ROUND_PI */
229 { 2 }, /* VGPU10_OPCODE_ROUND_Z */
230 { 2 }, /* VGPU10_OPCODE_RSQ */
231 { 4 }, /* VGPU10_OPCODE_SAMPLE */
232 { 5 }, /* VGPU10_OPCODE_SAMPLE_C */
233 { 5 }, /* VGPU10_OPCODE_SAMPLE_C_LZ */
234 { 5 }, /* VGPU10_OPCODE_SAMPLE_L */
235 { 6 }, /* VGPU10_OPCODE_SAMPLE_D */
236 { 5 }, /* VGPU10_OPCODE_SAMPLE_B */
237 { 2 }, /* VGPU10_OPCODE_SQRT */
238 { 1 }, /* VGPU10_OPCODE_SWITCH */
239 { 3 }, /* VGPU10_OPCODE_SINCOS */
240 { 4 }, /* VGPU10_OPCODE_UDIV */
241 { 3 }, /* VGPU10_OPCODE_ULT */
242 { 3 }, /* VGPU10_OPCODE_UGE */
243 { 4 }, /* VGPU10_OPCODE_UMUL */
244 { 4 }, /* VGPU10_OPCODE_UMAD */
245 { 3 }, /* VGPU10_OPCODE_UMAX */
246 { 3 }, /* VGPU10_OPCODE_UMIN */
247 { 3 }, /* VGPU10_OPCODE_USHR */
248 { 2 }, /* VGPU10_OPCODE_UTOF */
249 { 3 }, /* VGPU10_OPCODE_XOR */
250 { 1 }, /* VGPU10_OPCODE_DCL_RESOURCE */
251 { 1 }, /* VGPU10_OPCODE_DCL_CONSTANT_BUFFER */
252 { 1 }, /* VGPU10_OPCODE_DCL_SAMPLER */
253 { 1 }, /* VGPU10_OPCODE_DCL_INDEX_RANGE */
254 { 0 }, /* VGPU10_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY */
255 { 0 }, /* VGPU10_OPCODE_DCL_GS_INPUT_PRIMITIVE */
256 { 0 }, /* VGPU10_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT */
257 { 1 }, /* VGPU10_OPCODE_DCL_INPUT */
258 { 1 }, /* VGPU10_OPCODE_DCL_INPUT_SGV */
259 { 1 }, /* VGPU10_OPCODE_DCL_INPUT_SIV */
260 { 1 }, /* VGPU10_OPCODE_DCL_INPUT_PS */
261 { 1 }, /* VGPU10_OPCODE_DCL_INPUT_PS_SGV */
262 { 1 }, /* VGPU10_OPCODE_DCL_INPUT_PS_SIV */
263 { 1 }, /* VGPU10_OPCODE_DCL_OUTPUT */
264 { 1 }, /* VGPU10_OPCODE_DCL_OUTPUT_SGV */
265 { 1 }, /* VGPU10_OPCODE_DCL_OUTPUT_SIV */
266 { 0 }, /* VGPU10_OPCODE_DCL_TEMPS */
267 { 0 }, /* VGPU10_OPCODE_DCL_INDEXABLE_TEMP */
268 { 0 }, /* VGPU10_OPCODE_DCL_GLOBAL_FLAGS */
269 { UINT32_MAX }, /* VGPU10_OPCODE_VMWARE: special opcode */
270 { 4 }, /* VGPU10_OPCODE_LOD */
271 { 4 }, /* VGPU10_OPCODE_GATHER4 */
272 { 3 }, /* VGPU10_OPCODE_SAMPLE_POS */
273 { 2 }, /* VGPU10_OPCODE_SAMPLE_INFO */
274 { UINT32_MAX }, /* VGPU10_OPCODE_RESERVED1: special opcode */
275 { 0 }, /* VGPU10_OPCODE_HS_DECLS */
276 { 0 }, /* VGPU10_OPCODE_HS_CONTROL_POINT_PHASE */
277 { 0 }, /* VGPU10_OPCODE_HS_FORK_PHASE */
278 { 0 }, /* VGPU10_OPCODE_HS_JOIN_PHASE */
279 { 1 }, /* VGPU10_OPCODE_EMIT_STREAM */
280 { 1 }, /* VGPU10_OPCODE_CUT_STREAM */
281 { 1 }, /* VGPU10_OPCODE_EMITTHENCUT_STREAM */
282 { 1 }, /* VGPU10_OPCODE_INTERFACE_CALL */
283 { 2 }, /* VGPU10_OPCODE_BUFINFO */
284 { 2 }, /* VGPU10_OPCODE_DERIV_RTX_COARSE */
285 { 2 }, /* VGPU10_OPCODE_DERIV_RTX_FINE */
286 { 2 }, /* VGPU10_OPCODE_DERIV_RTY_COARSE */
287 { 2 }, /* VGPU10_OPCODE_DERIV_RTY_FINE */
288 { 5 }, /* VGPU10_OPCODE_GATHER4_C */
289 { 5 }, /* VGPU10_OPCODE_GATHER4_PO */
290 { 6 }, /* VGPU10_OPCODE_GATHER4_PO_C */
291 { 2 }, /* VGPU10_OPCODE_RCP */
292 { 2 }, /* VGPU10_OPCODE_F32TOF16 */
293 { 2 }, /* VGPU10_OPCODE_F16TOF32 */
294 { 4 }, /* VGPU10_OPCODE_UADDC */
295 { 4 }, /* VGPU10_OPCODE_USUBB */
296 { 2 }, /* VGPU10_OPCODE_COUNTBITS */
297 { 2 }, /* VGPU10_OPCODE_FIRSTBIT_HI */
298 { 2 }, /* VGPU10_OPCODE_FIRSTBIT_LO */
299 { 2 }, /* VGPU10_OPCODE_FIRSTBIT_SHI */
300 { 4 }, /* VGPU10_OPCODE_UBFE */
301 { 4 }, /* VGPU10_OPCODE_IBFE */
302 { 5 }, /* VGPU10_OPCODE_BFI */
303 { 2 }, /* VGPU10_OPCODE_BFREV */
304 { 5 }, /* VGPU10_OPCODE_SWAPC */
305 { 1 }, /* VGPU10_OPCODE_DCL_STREAM */
306 { 0 }, /* VGPU10_OPCODE_DCL_FUNCTION_BODY */
307 { 0 }, /* VGPU10_OPCODE_DCL_FUNCTION_TABLE */
308 { 0 }, /* VGPU10_OPCODE_DCL_INTERFACE */
309 { 0 }, /* VGPU10_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT */
310 { 0 }, /* VGPU10_OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT */
311 { 0 }, /* VGPU10_OPCODE_DCL_TESS_DOMAIN */
312 { 0 }, /* VGPU10_OPCODE_DCL_TESS_PARTITIONING */
313 { 0 }, /* VGPU10_OPCODE_DCL_TESS_OUTPUT_PRIMITIVE */
314 { 0 }, /* VGPU10_OPCODE_DCL_HS_MAX_TESSFACTOR */
315 { 0 }, /* VGPU10_OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT */
316 { 0 }, /* VGPU10_OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT */
317 { 0 }, /* VGPU10_OPCODE_DCL_THREAD_GROUP */
318 { 1 }, /* VGPU10_OPCODE_DCL_UAV_TYPED */
319 { 1 }, /* VGPU10_OPCODE_DCL_UAV_RAW */
320 { 1 }, /* VGPU10_OPCODE_DCL_UAV_STRUCTURED */
321 { 1 }, /* VGPU10_OPCODE_DCL_TGSM_RAW */
322 { 1 }, /* VGPU10_OPCODE_DCL_TGSM_STRUCTURED */
323 { 1 }, /* VGPU10_OPCODE_DCL_RESOURCE_RAW */
324 { 1 }, /* VGPU10_OPCODE_DCL_RESOURCE_STRUCTURED */
325 { 3 }, /* VGPU10_OPCODE_LD_UAV_TYPED */
326 { 3 }, /* VGPU10_OPCODE_STORE_UAV_TYPED */
327 { 3 }, /* VGPU10_OPCODE_LD_RAW */
328 { 3 }, /* VGPU10_OPCODE_STORE_RAW */
329 { 4 }, /* VGPU10_OPCODE_LD_STRUCTURED */
330 { 4 }, /* VGPU10_OPCODE_STORE_STRUCTURED */
331 { 3 }, /* VGPU10_OPCODE_ATOMIC_AND */
332 { 3 }, /* VGPU10_OPCODE_ATOMIC_OR */
333 { 3 }, /* VGPU10_OPCODE_ATOMIC_XOR */
334 { 4 }, /* VGPU10_OPCODE_ATOMIC_CMP_STORE */
335 { 3 }, /* VGPU10_OPCODE_ATOMIC_IADD */
336 { 3 }, /* VGPU10_OPCODE_ATOMIC_IMAX */
337 { 3 }, /* VGPU10_OPCODE_ATOMIC_IMIN */
338 { 3 }, /* VGPU10_OPCODE_ATOMIC_UMAX */
339 { 3 }, /* VGPU10_OPCODE_ATOMIC_UMIN */
340 { 2 }, /* VGPU10_OPCODE_IMM_ATOMIC_ALLOC */
341 { 2 }, /* VGPU10_OPCODE_IMM_ATOMIC_CONSUME */
342 { 4 }, /* VGPU10_OPCODE_IMM_ATOMIC_IADD */
343 { 4 }, /* VGPU10_OPCODE_IMM_ATOMIC_AND */
344 { 4 }, /* VGPU10_OPCODE_IMM_ATOMIC_OR */
345 { 4 }, /* VGPU10_OPCODE_IMM_ATOMIC_XOR */
346 { 4 }, /* VGPU10_OPCODE_IMM_ATOMIC_EXCH */
347 { 5 }, /* VGPU10_OPCODE_IMM_ATOMIC_CMP_EXCH */
348 { 4 }, /* VGPU10_OPCODE_IMM_ATOMIC_IMAX */
349 { 4 }, /* VGPU10_OPCODE_IMM_ATOMIC_IMIN */
350 { 4 }, /* VGPU10_OPCODE_IMM_ATOMIC_UMAX */
351 { 4 }, /* VGPU10_OPCODE_IMM_ATOMIC_UMIN */
352 { 0 }, /* VGPU10_OPCODE_SYNC */
353 { 3 }, /* VGPU10_OPCODE_DADD */
354 { 3 }, /* VGPU10_OPCODE_DMAX */
355 { 3 }, /* VGPU10_OPCODE_DMIN */
356 { 3 }, /* VGPU10_OPCODE_DMUL */
357 { 3 }, /* VGPU10_OPCODE_DEQ */
358 { 3 }, /* VGPU10_OPCODE_DGE */
359 { 3 }, /* VGPU10_OPCODE_DLT */
360 { 3 }, /* VGPU10_OPCODE_DNE */
361 { 2 }, /* VGPU10_OPCODE_DMOV */
362 { 4 }, /* VGPU10_OPCODE_DMOVC */
363 { 2 }, /* VGPU10_OPCODE_DTOF */
364 { 2 }, /* VGPU10_OPCODE_FTOD */
365 { 3 }, /* VGPU10_OPCODE_EVAL_SNAPPED */
366 { 3 }, /* VGPU10_OPCODE_EVAL_SAMPLE_INDEX */
367 { 2 }, /* VGPU10_OPCODE_EVAL_CENTROID */
368 { 0 }, /* VGPU10_OPCODE_DCL_GS_INSTANCE_COUNT */
369 { 0 }, /* VGPU10_OPCODE_ABORT */
370 { 0 }, /* VGPU10_OPCODE_DEBUG_BREAK */
371 { 0 }, /* VGPU10_OPCODE_RESERVED0 */
372 { 3 }, /* VGPU10_OPCODE_DDIV */
373 { 4 }, /* VGPU10_OPCODE_DFMA */
374 { 2 }, /* VGPU10_OPCODE_DRCP */
375 { 4 }, /* VGPU10_OPCODE_MSAD */
376 { 2 }, /* VGPU10_OPCODE_DTOI */
377 { 2 }, /* VGPU10_OPCODE_DTOU */
378 { 2 }, /* VGPU10_OPCODE_ITOD */
379 { 2 }, /* VGPU10_OPCODE_UTOD */
380};
381AssertCompile(RT_ELEMENTS(g_aOpcodeInfo) == VGPU10_NUM_OPCODES);
382
383#ifdef LOG_ENABLED
384/*
385 *
386 * Helpers to translate a VGPU10 shader constant to a string.
387 *
388 */
389
390#define SVGA_CASE_ID2STR(idx) case idx: return #idx
391
392static const char *dxbcOpcodeToString(uint32_t opcodeType)
393{
394 VGPU10_OPCODE_TYPE enm = (VGPU10_OPCODE_TYPE)opcodeType;
395 switch (enm)
396 {
397 SVGA_CASE_ID2STR(VGPU10_OPCODE_ADD);
398 SVGA_CASE_ID2STR(VGPU10_OPCODE_AND);
399 SVGA_CASE_ID2STR(VGPU10_OPCODE_BREAK);
400 SVGA_CASE_ID2STR(VGPU10_OPCODE_BREAKC);
401 SVGA_CASE_ID2STR(VGPU10_OPCODE_CALL);
402 SVGA_CASE_ID2STR(VGPU10_OPCODE_CALLC);
403 SVGA_CASE_ID2STR(VGPU10_OPCODE_CASE);
404 SVGA_CASE_ID2STR(VGPU10_OPCODE_CONTINUE);
405 SVGA_CASE_ID2STR(VGPU10_OPCODE_CONTINUEC);
406 SVGA_CASE_ID2STR(VGPU10_OPCODE_CUT);
407 SVGA_CASE_ID2STR(VGPU10_OPCODE_DEFAULT);
408 SVGA_CASE_ID2STR(VGPU10_OPCODE_DERIV_RTX);
409 SVGA_CASE_ID2STR(VGPU10_OPCODE_DERIV_RTY);
410 SVGA_CASE_ID2STR(VGPU10_OPCODE_DISCARD);
411 SVGA_CASE_ID2STR(VGPU10_OPCODE_DIV);
412 SVGA_CASE_ID2STR(VGPU10_OPCODE_DP2);
413 SVGA_CASE_ID2STR(VGPU10_OPCODE_DP3);
414 SVGA_CASE_ID2STR(VGPU10_OPCODE_DP4);
415 SVGA_CASE_ID2STR(VGPU10_OPCODE_ELSE);
416 SVGA_CASE_ID2STR(VGPU10_OPCODE_EMIT);
417 SVGA_CASE_ID2STR(VGPU10_OPCODE_EMITTHENCUT);
418 SVGA_CASE_ID2STR(VGPU10_OPCODE_ENDIF);
419 SVGA_CASE_ID2STR(VGPU10_OPCODE_ENDLOOP);
420 SVGA_CASE_ID2STR(VGPU10_OPCODE_ENDSWITCH);
421 SVGA_CASE_ID2STR(VGPU10_OPCODE_EQ);
422 SVGA_CASE_ID2STR(VGPU10_OPCODE_EXP);
423 SVGA_CASE_ID2STR(VGPU10_OPCODE_FRC);
424 SVGA_CASE_ID2STR(VGPU10_OPCODE_FTOI);
425 SVGA_CASE_ID2STR(VGPU10_OPCODE_FTOU);
426 SVGA_CASE_ID2STR(VGPU10_OPCODE_GE);
427 SVGA_CASE_ID2STR(VGPU10_OPCODE_IADD);
428 SVGA_CASE_ID2STR(VGPU10_OPCODE_IF);
429 SVGA_CASE_ID2STR(VGPU10_OPCODE_IEQ);
430 SVGA_CASE_ID2STR(VGPU10_OPCODE_IGE);
431 SVGA_CASE_ID2STR(VGPU10_OPCODE_ILT);
432 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMAD);
433 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMAX);
434 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMIN);
435 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMUL);
436 SVGA_CASE_ID2STR(VGPU10_OPCODE_INE);
437 SVGA_CASE_ID2STR(VGPU10_OPCODE_INEG);
438 SVGA_CASE_ID2STR(VGPU10_OPCODE_ISHL);
439 SVGA_CASE_ID2STR(VGPU10_OPCODE_ISHR);
440 SVGA_CASE_ID2STR(VGPU10_OPCODE_ITOF);
441 SVGA_CASE_ID2STR(VGPU10_OPCODE_LABEL);
442 SVGA_CASE_ID2STR(VGPU10_OPCODE_LD);
443 SVGA_CASE_ID2STR(VGPU10_OPCODE_LD_MS);
444 SVGA_CASE_ID2STR(VGPU10_OPCODE_LOG);
445 SVGA_CASE_ID2STR(VGPU10_OPCODE_LOOP);
446 SVGA_CASE_ID2STR(VGPU10_OPCODE_LT);
447 SVGA_CASE_ID2STR(VGPU10_OPCODE_MAD);
448 SVGA_CASE_ID2STR(VGPU10_OPCODE_MIN);
449 SVGA_CASE_ID2STR(VGPU10_OPCODE_MAX);
450 SVGA_CASE_ID2STR(VGPU10_OPCODE_CUSTOMDATA);
451 SVGA_CASE_ID2STR(VGPU10_OPCODE_MOV);
452 SVGA_CASE_ID2STR(VGPU10_OPCODE_MOVC);
453 SVGA_CASE_ID2STR(VGPU10_OPCODE_MUL);
454 SVGA_CASE_ID2STR(VGPU10_OPCODE_NE);
455 SVGA_CASE_ID2STR(VGPU10_OPCODE_NOP);
456 SVGA_CASE_ID2STR(VGPU10_OPCODE_NOT);
457 SVGA_CASE_ID2STR(VGPU10_OPCODE_OR);
458 SVGA_CASE_ID2STR(VGPU10_OPCODE_RESINFO);
459 SVGA_CASE_ID2STR(VGPU10_OPCODE_RET);
460 SVGA_CASE_ID2STR(VGPU10_OPCODE_RETC);
461 SVGA_CASE_ID2STR(VGPU10_OPCODE_ROUND_NE);
462 SVGA_CASE_ID2STR(VGPU10_OPCODE_ROUND_NI);
463 SVGA_CASE_ID2STR(VGPU10_OPCODE_ROUND_PI);
464 SVGA_CASE_ID2STR(VGPU10_OPCODE_ROUND_Z);
465 SVGA_CASE_ID2STR(VGPU10_OPCODE_RSQ);
466 SVGA_CASE_ID2STR(VGPU10_OPCODE_SAMPLE);
467 SVGA_CASE_ID2STR(VGPU10_OPCODE_SAMPLE_C);
468 SVGA_CASE_ID2STR(VGPU10_OPCODE_SAMPLE_C_LZ);
469 SVGA_CASE_ID2STR(VGPU10_OPCODE_SAMPLE_L);
470 SVGA_CASE_ID2STR(VGPU10_OPCODE_SAMPLE_D);
471 SVGA_CASE_ID2STR(VGPU10_OPCODE_SAMPLE_B);
472 SVGA_CASE_ID2STR(VGPU10_OPCODE_SQRT);
473 SVGA_CASE_ID2STR(VGPU10_OPCODE_SWITCH);
474 SVGA_CASE_ID2STR(VGPU10_OPCODE_SINCOS);
475 SVGA_CASE_ID2STR(VGPU10_OPCODE_UDIV);
476 SVGA_CASE_ID2STR(VGPU10_OPCODE_ULT);
477 SVGA_CASE_ID2STR(VGPU10_OPCODE_UGE);
478 SVGA_CASE_ID2STR(VGPU10_OPCODE_UMUL);
479 SVGA_CASE_ID2STR(VGPU10_OPCODE_UMAD);
480 SVGA_CASE_ID2STR(VGPU10_OPCODE_UMAX);
481 SVGA_CASE_ID2STR(VGPU10_OPCODE_UMIN);
482 SVGA_CASE_ID2STR(VGPU10_OPCODE_USHR);
483 SVGA_CASE_ID2STR(VGPU10_OPCODE_UTOF);
484 SVGA_CASE_ID2STR(VGPU10_OPCODE_XOR);
485 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_RESOURCE);
486 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_CONSTANT_BUFFER);
487 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_SAMPLER);
488 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INDEX_RANGE);
489 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY);
490 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_GS_INPUT_PRIMITIVE);
491 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT);
492 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INPUT);
493 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INPUT_SGV);
494 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INPUT_SIV);
495 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INPUT_PS);
496 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INPUT_PS_SGV);
497 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INPUT_PS_SIV);
498 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_OUTPUT);
499 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_OUTPUT_SGV);
500 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_OUTPUT_SIV);
501 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_TEMPS);
502 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INDEXABLE_TEMP);
503 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_GLOBAL_FLAGS);
504 SVGA_CASE_ID2STR(VGPU10_OPCODE_VMWARE);
505 SVGA_CASE_ID2STR(VGPU10_OPCODE_LOD);
506 SVGA_CASE_ID2STR(VGPU10_OPCODE_GATHER4);
507 SVGA_CASE_ID2STR(VGPU10_OPCODE_SAMPLE_POS);
508 SVGA_CASE_ID2STR(VGPU10_OPCODE_SAMPLE_INFO);
509 SVGA_CASE_ID2STR(VGPU10_OPCODE_RESERVED1);
510 SVGA_CASE_ID2STR(VGPU10_OPCODE_HS_DECLS);
511 SVGA_CASE_ID2STR(VGPU10_OPCODE_HS_CONTROL_POINT_PHASE);
512 SVGA_CASE_ID2STR(VGPU10_OPCODE_HS_FORK_PHASE);
513 SVGA_CASE_ID2STR(VGPU10_OPCODE_HS_JOIN_PHASE);
514 SVGA_CASE_ID2STR(VGPU10_OPCODE_EMIT_STREAM);
515 SVGA_CASE_ID2STR(VGPU10_OPCODE_CUT_STREAM);
516 SVGA_CASE_ID2STR(VGPU10_OPCODE_EMITTHENCUT_STREAM);
517 SVGA_CASE_ID2STR(VGPU10_OPCODE_INTERFACE_CALL);
518 SVGA_CASE_ID2STR(VGPU10_OPCODE_BUFINFO);
519 SVGA_CASE_ID2STR(VGPU10_OPCODE_DERIV_RTX_COARSE);
520 SVGA_CASE_ID2STR(VGPU10_OPCODE_DERIV_RTX_FINE);
521 SVGA_CASE_ID2STR(VGPU10_OPCODE_DERIV_RTY_COARSE);
522 SVGA_CASE_ID2STR(VGPU10_OPCODE_DERIV_RTY_FINE);
523 SVGA_CASE_ID2STR(VGPU10_OPCODE_GATHER4_C);
524 SVGA_CASE_ID2STR(VGPU10_OPCODE_GATHER4_PO);
525 SVGA_CASE_ID2STR(VGPU10_OPCODE_GATHER4_PO_C);
526 SVGA_CASE_ID2STR(VGPU10_OPCODE_RCP);
527 SVGA_CASE_ID2STR(VGPU10_OPCODE_F32TOF16);
528 SVGA_CASE_ID2STR(VGPU10_OPCODE_F16TOF32);
529 SVGA_CASE_ID2STR(VGPU10_OPCODE_UADDC);
530 SVGA_CASE_ID2STR(VGPU10_OPCODE_USUBB);
531 SVGA_CASE_ID2STR(VGPU10_OPCODE_COUNTBITS);
532 SVGA_CASE_ID2STR(VGPU10_OPCODE_FIRSTBIT_HI);
533 SVGA_CASE_ID2STR(VGPU10_OPCODE_FIRSTBIT_LO);
534 SVGA_CASE_ID2STR(VGPU10_OPCODE_FIRSTBIT_SHI);
535 SVGA_CASE_ID2STR(VGPU10_OPCODE_UBFE);
536 SVGA_CASE_ID2STR(VGPU10_OPCODE_IBFE);
537 SVGA_CASE_ID2STR(VGPU10_OPCODE_BFI);
538 SVGA_CASE_ID2STR(VGPU10_OPCODE_BFREV);
539 SVGA_CASE_ID2STR(VGPU10_OPCODE_SWAPC);
540 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_STREAM);
541 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_FUNCTION_BODY);
542 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_FUNCTION_TABLE);
543 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INTERFACE);
544 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT);
545 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT);
546 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_TESS_DOMAIN);
547 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_TESS_PARTITIONING);
548 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_TESS_OUTPUT_PRIMITIVE);
549 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_HS_MAX_TESSFACTOR);
550 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT);
551 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT);
552 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_THREAD_GROUP);
553 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_UAV_TYPED);
554 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_UAV_RAW);
555 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_UAV_STRUCTURED);
556 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_TGSM_RAW);
557 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_TGSM_STRUCTURED);
558 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_RESOURCE_RAW);
559 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_RESOURCE_STRUCTURED);
560 SVGA_CASE_ID2STR(VGPU10_OPCODE_LD_UAV_TYPED);
561 SVGA_CASE_ID2STR(VGPU10_OPCODE_STORE_UAV_TYPED);
562 SVGA_CASE_ID2STR(VGPU10_OPCODE_LD_RAW);
563 SVGA_CASE_ID2STR(VGPU10_OPCODE_STORE_RAW);
564 SVGA_CASE_ID2STR(VGPU10_OPCODE_LD_STRUCTURED);
565 SVGA_CASE_ID2STR(VGPU10_OPCODE_STORE_STRUCTURED);
566 SVGA_CASE_ID2STR(VGPU10_OPCODE_ATOMIC_AND);
567 SVGA_CASE_ID2STR(VGPU10_OPCODE_ATOMIC_OR);
568 SVGA_CASE_ID2STR(VGPU10_OPCODE_ATOMIC_XOR);
569 SVGA_CASE_ID2STR(VGPU10_OPCODE_ATOMIC_CMP_STORE);
570 SVGA_CASE_ID2STR(VGPU10_OPCODE_ATOMIC_IADD);
571 SVGA_CASE_ID2STR(VGPU10_OPCODE_ATOMIC_IMAX);
572 SVGA_CASE_ID2STR(VGPU10_OPCODE_ATOMIC_IMIN);
573 SVGA_CASE_ID2STR(VGPU10_OPCODE_ATOMIC_UMAX);
574 SVGA_CASE_ID2STR(VGPU10_OPCODE_ATOMIC_UMIN);
575 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_ALLOC);
576 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_CONSUME);
577 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_IADD);
578 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_AND);
579 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_OR);
580 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_XOR);
581 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_EXCH);
582 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_CMP_EXCH);
583 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_IMAX);
584 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_IMIN);
585 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_UMAX);
586 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_UMIN);
587 SVGA_CASE_ID2STR(VGPU10_OPCODE_SYNC);
588 SVGA_CASE_ID2STR(VGPU10_OPCODE_DADD);
589 SVGA_CASE_ID2STR(VGPU10_OPCODE_DMAX);
590 SVGA_CASE_ID2STR(VGPU10_OPCODE_DMIN);
591 SVGA_CASE_ID2STR(VGPU10_OPCODE_DMUL);
592 SVGA_CASE_ID2STR(VGPU10_OPCODE_DEQ);
593 SVGA_CASE_ID2STR(VGPU10_OPCODE_DGE);
594 SVGA_CASE_ID2STR(VGPU10_OPCODE_DLT);
595 SVGA_CASE_ID2STR(VGPU10_OPCODE_DNE);
596 SVGA_CASE_ID2STR(VGPU10_OPCODE_DMOV);
597 SVGA_CASE_ID2STR(VGPU10_OPCODE_DMOVC);
598 SVGA_CASE_ID2STR(VGPU10_OPCODE_DTOF);
599 SVGA_CASE_ID2STR(VGPU10_OPCODE_FTOD);
600 SVGA_CASE_ID2STR(VGPU10_OPCODE_EVAL_SNAPPED);
601 SVGA_CASE_ID2STR(VGPU10_OPCODE_EVAL_SAMPLE_INDEX);
602 SVGA_CASE_ID2STR(VGPU10_OPCODE_EVAL_CENTROID);
603 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_GS_INSTANCE_COUNT);
604 SVGA_CASE_ID2STR(VGPU10_OPCODE_ABORT);
605 SVGA_CASE_ID2STR(VGPU10_OPCODE_DEBUG_BREAK);
606 SVGA_CASE_ID2STR(VGPU10_OPCODE_RESERVED0);
607 SVGA_CASE_ID2STR(VGPU10_OPCODE_DDIV);
608 SVGA_CASE_ID2STR(VGPU10_OPCODE_DFMA);
609 SVGA_CASE_ID2STR(VGPU10_OPCODE_DRCP);
610 SVGA_CASE_ID2STR(VGPU10_OPCODE_MSAD);
611 SVGA_CASE_ID2STR(VGPU10_OPCODE_DTOI);
612 SVGA_CASE_ID2STR(VGPU10_OPCODE_DTOU);
613 SVGA_CASE_ID2STR(VGPU10_OPCODE_ITOD);
614 SVGA_CASE_ID2STR(VGPU10_OPCODE_UTOD);
615 SVGA_CASE_ID2STR(VGPU10_NUM_OPCODES);
616 }
617 return NULL;
618}
619
620
621static const char *dxbcShaderTypeToString(uint32_t value)
622{
623 VGPU10_PROGRAM_TYPE enm = (VGPU10_PROGRAM_TYPE)value;
624 switch (enm)
625 {
626 SVGA_CASE_ID2STR(VGPU10_PIXEL_SHADER);
627 SVGA_CASE_ID2STR(VGPU10_VERTEX_SHADER);
628 SVGA_CASE_ID2STR(VGPU10_GEOMETRY_SHADER);
629 SVGA_CASE_ID2STR(VGPU10_HULL_SHADER);
630 SVGA_CASE_ID2STR(VGPU10_DOMAIN_SHADER);
631 SVGA_CASE_ID2STR(VGPU10_COMPUTE_SHADER);
632 }
633 return NULL;
634}
635
636
637static const char *dxbcCustomDataClassToString(uint32_t value)
638{
639 VGPU10_CUSTOMDATA_CLASS enm = (VGPU10_CUSTOMDATA_CLASS)value;
640 switch (enm)
641 {
642 SVGA_CASE_ID2STR(VGPU10_CUSTOMDATA_COMMENT);
643 SVGA_CASE_ID2STR(VGPU10_CUSTOMDATA_DEBUGINFO);
644 SVGA_CASE_ID2STR(VGPU10_CUSTOMDATA_OPAQUE);
645 SVGA_CASE_ID2STR(VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER);
646 }
647 return NULL;
648}
649
650
651static const char *dxbcSystemNameToString(uint32_t value)
652{
653 VGPU10_SYSTEM_NAME enm = (VGPU10_SYSTEM_NAME)value;
654 switch (enm)
655 {
656 SVGA_CASE_ID2STR(VGPU10_NAME_UNDEFINED);
657 SVGA_CASE_ID2STR(VGPU10_NAME_POSITION);
658 SVGA_CASE_ID2STR(VGPU10_NAME_CLIP_DISTANCE);
659 SVGA_CASE_ID2STR(VGPU10_NAME_CULL_DISTANCE);
660 SVGA_CASE_ID2STR(VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX);
661 SVGA_CASE_ID2STR(VGPU10_NAME_VIEWPORT_ARRAY_INDEX);
662 SVGA_CASE_ID2STR(VGPU10_NAME_VERTEX_ID);
663 SVGA_CASE_ID2STR(VGPU10_NAME_PRIMITIVE_ID);
664 SVGA_CASE_ID2STR(VGPU10_NAME_INSTANCE_ID);
665 SVGA_CASE_ID2STR(VGPU10_NAME_IS_FRONT_FACE);
666 SVGA_CASE_ID2STR(VGPU10_NAME_SAMPLE_INDEX);
667 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR);
668 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR);
669 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR);
670 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR);
671 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR);
672 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR);
673 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR);
674 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR);
675 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR);
676 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_TRI_INSIDE_TESSFACTOR);
677 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_LINE_DETAIL_TESSFACTOR);
678 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_LINE_DENSITY_TESSFACTOR);
679 }
680 return NULL;
681}
682
683
684static const char *dxbcOperandTypeToString(uint32_t value)
685{
686 VGPU10_OPERAND_TYPE enm = (VGPU10_OPERAND_TYPE)value;
687 switch (enm)
688 {
689 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_TEMP);
690 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT);
691 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_OUTPUT);
692 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INDEXABLE_TEMP);
693 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_IMMEDIATE32);
694 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_IMMEDIATE64);
695 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_SAMPLER);
696 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_RESOURCE);
697 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_CONSTANT_BUFFER);
698 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER);
699 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_LABEL);
700 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID);
701 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_OUTPUT_DEPTH);
702 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_NULL);
703 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_RASTERIZER);
704 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK);
705 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_STREAM);
706 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_FUNCTION_BODY);
707 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_FUNCTION_TABLE);
708 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INTERFACE);
709 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_FUNCTION_INPUT);
710 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_FUNCTION_OUTPUT);
711 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID);
712 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_FORK_INSTANCE_ID);
713 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_JOIN_INSTANCE_ID);
714 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT);
715 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT);
716 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT);
717 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT);
718 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_THIS_POINTER);
719 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_UAV);
720 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY);
721 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_THREAD_ID);
722 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_THREAD_GROUP_ID);
723 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP);
724 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK);
725 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED);
726 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID);
727 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL);
728 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL);
729 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_CYCLE_COUNTER);
730 SVGA_CASE_ID2STR(VGPU10_NUM_OPERANDS);
731 }
732 return NULL;
733}
734
735
736static const char *dxbcOperandNumComponentsToString(uint32_t value)
737{
738 VGPU10_OPERAND_NUM_COMPONENTS enm = (VGPU10_OPERAND_NUM_COMPONENTS)value;
739 switch (enm)
740 {
741 SVGA_CASE_ID2STR(VGPU10_OPERAND_0_COMPONENT);
742 SVGA_CASE_ID2STR(VGPU10_OPERAND_1_COMPONENT);
743 SVGA_CASE_ID2STR(VGPU10_OPERAND_4_COMPONENT);
744 SVGA_CASE_ID2STR(VGPU10_OPERAND_N_COMPONENT);
745 }
746 return NULL;
747}
748
749
750static const char *dxbcOperandComponentModeToString(uint32_t value)
751{
752 VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE enm = (VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE)value;
753 switch (enm)
754 {
755 SVGA_CASE_ID2STR(VGPU10_OPERAND_4_COMPONENT_MASK_MODE);
756 SVGA_CASE_ID2STR(VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE);
757 SVGA_CASE_ID2STR(VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE);
758 }
759 return NULL;
760}
761
762
763static const char *dxbcOperandComponentNameToString(uint32_t value)
764{
765 VGPU10_COMPONENT_NAME enm = (VGPU10_COMPONENT_NAME)value;
766 switch (enm)
767 {
768 SVGA_CASE_ID2STR(VGPU10_COMPONENT_X);
769 SVGA_CASE_ID2STR(VGPU10_COMPONENT_Y);
770 SVGA_CASE_ID2STR(VGPU10_COMPONENT_Z);
771 SVGA_CASE_ID2STR(VGPU10_COMPONENT_W);
772 }
773 return NULL;
774}
775
776
777static const char *dxbcOperandIndexDimensionToString(uint32_t value)
778{
779 VGPU10_OPERAND_INDEX_DIMENSION enm = (VGPU10_OPERAND_INDEX_DIMENSION)value;
780 switch (enm)
781 {
782 SVGA_CASE_ID2STR(VGPU10_OPERAND_INDEX_0D);
783 SVGA_CASE_ID2STR(VGPU10_OPERAND_INDEX_1D);
784 SVGA_CASE_ID2STR(VGPU10_OPERAND_INDEX_2D);
785 SVGA_CASE_ID2STR(VGPU10_OPERAND_INDEX_3D);
786 }
787 return NULL;
788}
789
790
791static const char *dxbcOperandIndexRepresentationToString(uint32_t value)
792{
793 VGPU10_OPERAND_INDEX_REPRESENTATION enm = (VGPU10_OPERAND_INDEX_REPRESENTATION)value;
794 switch (enm)
795 {
796 SVGA_CASE_ID2STR(VGPU10_OPERAND_INDEX_IMMEDIATE32);
797 SVGA_CASE_ID2STR(VGPU10_OPERAND_INDEX_IMMEDIATE64);
798 SVGA_CASE_ID2STR(VGPU10_OPERAND_INDEX_RELATIVE);
799 SVGA_CASE_ID2STR(VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE);
800 SVGA_CASE_ID2STR(VGPU10_OPERAND_INDEX_IMMEDIATE64_PLUS_RELATIVE);
801 }
802 return NULL;
803}
804
805
806static const char *dxbcInterpolationModeToString(uint32_t value)
807{
808 VGPU10_INTERPOLATION_MODE enm = (VGPU10_INTERPOLATION_MODE)value;
809 switch (enm)
810 {
811 SVGA_CASE_ID2STR(VGPU10_INTERPOLATION_UNDEFINED);
812 SVGA_CASE_ID2STR(VGPU10_INTERPOLATION_CONSTANT);
813 SVGA_CASE_ID2STR(VGPU10_INTERPOLATION_LINEAR);
814 SVGA_CASE_ID2STR(VGPU10_INTERPOLATION_LINEAR_CENTROID);
815 SVGA_CASE_ID2STR(VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE);
816 SVGA_CASE_ID2STR(VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID);
817 SVGA_CASE_ID2STR(VGPU10_INTERPOLATION_LINEAR_SAMPLE);
818 SVGA_CASE_ID2STR(VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE);
819 }
820 return NULL;
821}
822
823
824static const char *dxbcResourceDimensionToString(uint32_t value)
825{
826 VGPU10_RESOURCE_DIMENSION enm = (VGPU10_RESOURCE_DIMENSION)value;
827 switch (enm)
828 {
829 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_UNKNOWN);
830 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_BUFFER);
831 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_TEXTURE1D);
832 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_TEXTURE2D);
833 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS);
834 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_TEXTURE3D);
835 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_TEXTURECUBE);
836 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_TEXTURE1DARRAY);
837 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY);
838 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY);
839 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY);
840 }
841 return NULL;
842}
843
844
845static const char *dxbcVmwareOpcodeTypeToString(uint32_t value)
846{
847 VGPU10_VMWARE_OPCODE_TYPE enm = (VGPU10_VMWARE_OPCODE_TYPE)value;
848 switch (enm)
849 {
850 SVGA_CASE_ID2STR(VGPU10_VMWARE_OPCODE_IDIV);
851 SVGA_CASE_ID2STR(VGPU10_VMWARE_OPCODE_DFRC);
852 SVGA_CASE_ID2STR(VGPU10_VMWARE_OPCODE_DRSQ);
853 SVGA_CASE_ID2STR(VGPU10_VMWARE_NUM_OPCODES);
854 }
855 return NULL;
856}
857
858#endif /* LOG_ENABLED */
859
860/*
861 * MD5 from IPRT (alt-md5.cpp) for DXBC hash calculation.
862 * DXBC hash function uses a different padding for the data, see dxbcHash.
863 * Therefore RTMd5Final is not needed. Two functions have been renamed: dxbcRTMd5Update dxbcRTMd5Init.
864 */
865
866
867/* The four core functions - F1 is optimized somewhat */
868/* #define F1(x, y, z) (x & y | ~x & z) */
869#define F1(x, y, z) (z ^ (x & (y ^ z)))
870#define F2(x, y, z) F1(z, x, y)
871#define F3(x, y, z) (x ^ y ^ z)
872#define F4(x, y, z) (y ^ (x | ~z))
873
874
875/* This is the central step in the MD5 algorithm. */
876#define MD5STEP(f, w, x, y, z, data, s) \
877 ( w += f(x, y, z) + data, w = w<<s | w>>(32-s), w += x )
878
879
880/**
881 * The core of the MD5 algorithm, this alters an existing MD5 hash to reflect
882 * the addition of 16 longwords of new data. RTMd5Update blocks the data and
883 * converts bytes into longwords for this routine.
884 */
885static void rtMd5Transform(uint32_t buf[4], uint32_t const in[16])
886{
887 uint32_t a, b, c, d;
888
889 a = buf[0];
890 b = buf[1];
891 c = buf[2];
892 d = buf[3];
893
894 /* fn, w, x, y, z, data, s) */
895 MD5STEP(F1, a, b, c, d, in[ 0] + 0xd76aa478, 7);
896 MD5STEP(F1, d, a, b, c, in[ 1] + 0xe8c7b756, 12);
897 MD5STEP(F1, c, d, a, b, in[ 2] + 0x242070db, 17);
898 MD5STEP(F1, b, c, d, a, in[ 3] + 0xc1bdceee, 22);
899 MD5STEP(F1, a, b, c, d, in[ 4] + 0xf57c0faf, 7);
900 MD5STEP(F1, d, a, b, c, in[ 5] + 0x4787c62a, 12);
901 MD5STEP(F1, c, d, a, b, in[ 6] + 0xa8304613, 17);
902 MD5STEP(F1, b, c, d, a, in[ 7] + 0xfd469501, 22);
903 MD5STEP(F1, a, b, c, d, in[ 8] + 0x698098d8, 7);
904 MD5STEP(F1, d, a, b, c, in[ 9] + 0x8b44f7af, 12);
905 MD5STEP(F1, c, d, a, b, in[10] + 0xffff5bb1, 17);
906 MD5STEP(F1, b, c, d, a, in[11] + 0x895cd7be, 22);
907 MD5STEP(F1, a, b, c, d, in[12] + 0x6b901122, 7);
908 MD5STEP(F1, d, a, b, c, in[13] + 0xfd987193, 12);
909 MD5STEP(F1, c, d, a, b, in[14] + 0xa679438e, 17);
910 MD5STEP(F1, b, c, d, a, in[15] + 0x49b40821, 22);
911
912 MD5STEP(F2, a, b, c, d, in[ 1] + 0xf61e2562, 5);
913 MD5STEP(F2, d, a, b, c, in[ 6] + 0xc040b340, 9);
914 MD5STEP(F2, c, d, a, b, in[11] + 0x265e5a51, 14);
915 MD5STEP(F2, b, c, d, a, in[ 0] + 0xe9b6c7aa, 20);
916 MD5STEP(F2, a, b, c, d, in[ 5] + 0xd62f105d, 5);
917 MD5STEP(F2, d, a, b, c, in[10] + 0x02441453, 9);
918 MD5STEP(F2, c, d, a, b, in[15] + 0xd8a1e681, 14);
919 MD5STEP(F2, b, c, d, a, in[ 4] + 0xe7d3fbc8, 20);
920 MD5STEP(F2, a, b, c, d, in[ 9] + 0x21e1cde6, 5);
921 MD5STEP(F2, d, a, b, c, in[14] + 0xc33707d6, 9);
922 MD5STEP(F2, c, d, a, b, in[ 3] + 0xf4d50d87, 14);
923 MD5STEP(F2, b, c, d, a, in[ 8] + 0x455a14ed, 20);
924 MD5STEP(F2, a, b, c, d, in[13] + 0xa9e3e905, 5);
925 MD5STEP(F2, d, a, b, c, in[ 2] + 0xfcefa3f8, 9);
926 MD5STEP(F2, c, d, a, b, in[ 7] + 0x676f02d9, 14);
927 MD5STEP(F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20);
928
929 MD5STEP(F3, a, b, c, d, in[ 5] + 0xfffa3942, 4);
930 MD5STEP(F3, d, a, b, c, in[ 8] + 0x8771f681, 11);
931 MD5STEP(F3, c, d, a, b, in[11] + 0x6d9d6122, 16);
932 MD5STEP(F3, b, c, d, a, in[14] + 0xfde5380c, 23);
933 MD5STEP(F3, a, b, c, d, in[ 1] + 0xa4beea44, 4);
934 MD5STEP(F3, d, a, b, c, in[ 4] + 0x4bdecfa9, 11);
935 MD5STEP(F3, c, d, a, b, in[ 7] + 0xf6bb4b60, 16);
936 MD5STEP(F3, b, c, d, a, in[10] + 0xbebfbc70, 23);
937 MD5STEP(F3, a, b, c, d, in[13] + 0x289b7ec6, 4);
938 MD5STEP(F3, d, a, b, c, in[ 0] + 0xeaa127fa, 11);
939 MD5STEP(F3, c, d, a, b, in[ 3] + 0xd4ef3085, 16);
940 MD5STEP(F3, b, c, d, a, in[ 6] + 0x04881d05, 23);
941 MD5STEP(F3, a, b, c, d, in[ 9] + 0xd9d4d039, 4);
942 MD5STEP(F3, d, a, b, c, in[12] + 0xe6db99e5, 11);
943 MD5STEP(F3, c, d, a, b, in[15] + 0x1fa27cf8, 16);
944 MD5STEP(F3, b, c, d, a, in[ 2] + 0xc4ac5665, 23);
945
946 MD5STEP(F4, a, b, c, d, in[ 0] + 0xf4292244, 6);
947 MD5STEP(F4, d, a, b, c, in[ 7] + 0x432aff97, 10);
948 MD5STEP(F4, c, d, a, b, in[14] + 0xab9423a7, 15);
949 MD5STEP(F4, b, c, d, a, in[ 5] + 0xfc93a039, 21);
950 MD5STEP(F4, a, b, c, d, in[12] + 0x655b59c3, 6);
951 MD5STEP(F4, d, a, b, c, in[ 3] + 0x8f0ccc92, 10);
952 MD5STEP(F4, c, d, a, b, in[10] + 0xffeff47d, 15);
953 MD5STEP(F4, b, c, d, a, in[ 1] + 0x85845dd1, 21);
954 MD5STEP(F4, a, b, c, d, in[ 8] + 0x6fa87e4f, 6);
955 MD5STEP(F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10);
956 MD5STEP(F4, c, d, a, b, in[ 6] + 0xa3014314, 15);
957 MD5STEP(F4, b, c, d, a, in[13] + 0x4e0811a1, 21);
958 MD5STEP(F4, a, b, c, d, in[ 4] + 0xf7537e82, 6);
959 MD5STEP(F4, d, a, b, c, in[11] + 0xbd3af235, 10);
960 MD5STEP(F4, c, d, a, b, in[ 2] + 0x2ad7d2bb, 15);
961 MD5STEP(F4, b, c, d, a, in[ 9] + 0xeb86d391, 21);
962
963 buf[0] += a;
964 buf[1] += b;
965 buf[2] += c;
966 buf[3] += d;
967}
968
969
970#ifdef RT_BIG_ENDIAN
971/*
972 * Note: this code is harmless on little-endian machines.
973 */
974static void rtMd5ByteReverse(uint32_t *buf, unsigned int longs)
975{
976 uint32_t t;
977 do
978 {
979 t = *buf;
980 t = RT_LE2H_U32(t);
981 *buf = t;
982 buf++;
983 } while (--longs);
984}
985#else /* little endian - do nothing */
986# define rtMd5ByteReverse(buf, len) do { /* Nothing */ } while (0)
987#endif
988
989
990/*
991 * Start MD5 accumulation. Set bit count to 0 and buffer to mysterious
992 * initialization constants.
993 */
994static void dxbcRTMd5Init(PRTMD5CONTEXT pCtx)
995{
996 pCtx->AltPrivate.buf[0] = 0x67452301;
997 pCtx->AltPrivate.buf[1] = 0xefcdab89;
998 pCtx->AltPrivate.buf[2] = 0x98badcfe;
999 pCtx->AltPrivate.buf[3] = 0x10325476;
1000
1001 pCtx->AltPrivate.bits[0] = 0;
1002 pCtx->AltPrivate.bits[1] = 0;
1003}
1004
1005
1006/*
1007 * Update context to reflect the concatenation of another buffer full
1008 * of bytes.
1009 */
1010/** @todo Optimize this, because len is always a multiple of 64. */
1011static void dxbcRTMd5Update(PRTMD5CONTEXT pCtx, const void *pvBuf, size_t len)
1012{
1013 const uint8_t *buf = (const uint8_t *)pvBuf;
1014 uint32_t t;
1015
1016 /* Update bitcount */
1017 t = pCtx->AltPrivate.bits[0];
1018 if ((pCtx->AltPrivate.bits[0] = t + ((uint32_t) len << 3)) < t)
1019 pCtx->AltPrivate.bits[1]++; /* Carry from low to high */
1020 pCtx->AltPrivate.bits[1] += (uint32_t)(len >> 29);
1021
1022 t = (t >> 3) & 0x3f; /* Bytes already in shsInfo->data */
1023
1024 /* Handle any leading odd-sized chunks */
1025 if (t)
1026 {
1027 uint8_t *p = (uint8_t *) pCtx->AltPrivate.in + t;
1028
1029 t = 64 - t;
1030 if (len < t)
1031 {
1032 memcpy(p, buf, len);
1033 return;
1034 }
1035 memcpy(p, buf, t);
1036 rtMd5ByteReverse(pCtx->AltPrivate.in, 16);
1037 rtMd5Transform(pCtx->AltPrivate.buf, pCtx->AltPrivate.in);
1038 buf += t;
1039 len -= t;
1040 }
1041
1042 /* Process data in 64-byte chunks */
1043#ifndef RT_BIG_ENDIAN
1044 if (!((uintptr_t)buf & 0x3))
1045 {
1046 while (len >= 64) {
1047 rtMd5Transform(pCtx->AltPrivate.buf, (uint32_t const *)buf);
1048 buf += 64;
1049 len -= 64;
1050 }
1051 }
1052 else
1053#endif
1054 {
1055 while (len >= 64) {
1056 memcpy(pCtx->AltPrivate.in, buf, 64);
1057 rtMd5ByteReverse(pCtx->AltPrivate.in, 16);
1058 rtMd5Transform(pCtx->AltPrivate.buf, pCtx->AltPrivate.in);
1059 buf += 64;
1060 len -= 64;
1061 }
1062 }
1063
1064 /* Handle any remaining bytes of data */
1065 memcpy(pCtx->AltPrivate.in, buf, len);
1066}
1067
1068
1069static void dxbcHash(void const *pvData, uint32_t cbData, uint8_t pabDigest[RTMD5HASHSIZE])
1070{
1071 size_t const kBlockSize = 64;
1072 uint8_t au8BlockBuffer[kBlockSize];
1073
1074 static uint8_t const s_au8Padding[kBlockSize] =
1075 {
1076 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1077 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1078 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1079 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1080 };
1081
1082 RTMD5CONTEXT Ctx;
1083 PRTMD5CONTEXT const pCtx = &Ctx;
1084 dxbcRTMd5Init(pCtx);
1085
1086 uint8_t const *pu8Data = (uint8_t *)pvData;
1087 size_t cbRemaining = cbData;
1088
1089 size_t const cbCompleteBlocks = cbData & ~ (kBlockSize - 1);
1090 dxbcRTMd5Update(pCtx, pu8Data, cbCompleteBlocks);
1091 pu8Data += cbCompleteBlocks;
1092 cbRemaining -= cbCompleteBlocks;
1093
1094 /* Custom padding. */
1095 if (cbRemaining >= kBlockSize - 2 * sizeof(uint32_t))
1096 {
1097 /* Two additional blocks. */
1098 memcpy(&au8BlockBuffer[0], pu8Data, cbRemaining);
1099 memcpy(&au8BlockBuffer[cbRemaining], s_au8Padding, kBlockSize - cbRemaining);
1100 dxbcRTMd5Update(pCtx, au8BlockBuffer, kBlockSize);
1101
1102 memset(&au8BlockBuffer[sizeof(uint32_t)], 0, kBlockSize - 2 * sizeof(uint32_t));
1103 }
1104 else
1105 {
1106 /* One additional block. */
1107 memcpy(&au8BlockBuffer[sizeof(uint32_t)], pu8Data, cbRemaining);
1108 memcpy(&au8BlockBuffer[sizeof(uint32_t) + cbRemaining], s_au8Padding, kBlockSize - cbRemaining - 2 * sizeof(uint32_t));
1109 }
1110
1111 /* Set the first and last dwords of the last block. */
1112 *(uint32_t *)&au8BlockBuffer[0] = cbData << 3;
1113 *(uint32_t *)&au8BlockBuffer[kBlockSize - sizeof(uint32_t)] = (cbData << 1) | 1;
1114 dxbcRTMd5Update(pCtx, au8BlockBuffer, kBlockSize);
1115
1116 AssertCompile(sizeof(pCtx->AltPrivate.buf) == RTMD5HASHSIZE);
1117 memcpy(pabDigest, pCtx->AltPrivate.buf, RTMD5HASHSIZE);
1118}
1119
1120
1121/*
1122 *
1123 * Shader token reader.
1124 *
1125 */
1126
1127typedef struct DXBCTokenReader
1128{
1129 uint32_t const *pToken; /* Next token to read. */
1130 uint32_t cToken; /* How many tokens total. */
1131 uint32_t cRemainingToken; /* How many tokens remain. */
1132} DXBCTokenReader;
1133
1134
1135#ifdef LOG_ENABLED
1136DECLINLINE(uint32_t) dxbcTokenReaderByteOffset(DXBCTokenReader *r)
1137{
1138 return (r->cToken - r->cRemainingToken) * 4;
1139}
1140#endif
1141
1142
1143#if 0 // Unused for now
1144DECLINLINE(uint32_t) dxbcTokenReaderRemaining(DXBCTokenReader *r)
1145{
1146 return r->cRemainingToken;
1147}
1148#endif
1149
1150
1151DECLINLINE(uint32_t const *) dxbcTokenReaderPtr(DXBCTokenReader *r)
1152{
1153 return r->pToken;
1154}
1155
1156
1157DECLINLINE(bool) dxbcTokenReaderCanRead(DXBCTokenReader *r, uint32_t cToken)
1158{
1159 return cToken <= r->cRemainingToken;
1160}
1161
1162
1163DECLINLINE(void) dxbcTokenReaderSkip(DXBCTokenReader *r, uint32_t cToken)
1164{
1165 AssertReturnVoid(r->cRemainingToken >= cToken);
1166 r->cRemainingToken -= cToken;
1167 r->pToken += cToken;
1168}
1169
1170
1171DECLINLINE(uint32_t) dxbcTokenReaderRead32(DXBCTokenReader *r)
1172{
1173 AssertReturn(r->cRemainingToken, 0);
1174 --r->cRemainingToken;
1175 return *(r->pToken++);
1176}
1177
1178
1179DECLINLINE(uint64_t) dxbcTokenReaderRead64(DXBCTokenReader *r)
1180{
1181 uint64_t const u64Low = dxbcTokenReaderRead32(r);
1182 uint64_t const u64High = dxbcTokenReaderRead32(r);
1183 return u64Low + (u64High << 32);
1184}
1185
1186
1187/*
1188 *
1189 * Byte writer.
1190 *
1191 */
1192
1193typedef struct DXBCByteWriter
1194{
1195 uint8_t *pu8ByteCodeBegin; /* First byte of the buffer. */
1196 uint8_t *pu8ByteCodePtr; /* Next byte to be written. */
1197 uint32_t cbAllocated; /* How many bytes allocated in the buffer. */
1198 uint32_t cbRemaining; /* How many bytes remain in the buffer. */
1199 uint32_t cbWritten; /* Offset of first never written byte.
1200 * Since the writer allows to jump in the buffer, this field tracks
1201 * the upper boundary of the written data.
1202 */
1203 int32_t rc;
1204} DXBCByteWriter;
1205
1206
1207typedef struct DXBCByteWriterState
1208{
1209 uint32_t off; /* Offset of the next free byte. */
1210} DXBCByteWriterState;
1211
1212
1213DECLINLINE(void *) dxbcByteWriterPtr(DXBCByteWriter *w)
1214{
1215 return w->pu8ByteCodePtr;
1216}
1217
1218
1219DECLINLINE(uint32_t) dxbcByteWriterSize(DXBCByteWriter *w)
1220{
1221 return (uint32_t)(w->pu8ByteCodePtr - w->pu8ByteCodeBegin);
1222}
1223
1224
1225static bool dxbcByteWriterRealloc(DXBCByteWriter *w, uint32_t cbNew)
1226{
1227 void *pvNew = RTMemAllocZ(cbNew);
1228 if (!pvNew)
1229 {
1230 w->rc = VERR_NO_MEMORY;
1231 return false;
1232 }
1233
1234 uint32_t const cbCurrent = dxbcByteWriterSize(w);
1235 memcpy(pvNew, w->pu8ByteCodeBegin, cbCurrent);
1236 RTMemFree(w->pu8ByteCodeBegin);
1237
1238 w->pu8ByteCodeBegin = (uint8_t *)pvNew;
1239 w->pu8ByteCodePtr = w->pu8ByteCodeBegin + cbCurrent;
1240 w->cbAllocated = cbNew;
1241 w->cbRemaining = cbNew - cbCurrent;
1242 return true;
1243}
1244
1245
1246DECLINLINE(bool) dxbcByteWriterSetOffset(DXBCByteWriter *w, uint32_t off, DXBCByteWriterState *pSavedWriterState)
1247{
1248 if (RT_FAILURE(w->rc))
1249 return false;
1250
1251 uint32_t const cbNew = RT_ALIGN_32(off, 1024);
1252 uint32_t const cbMax = 2 * SVGA3D_MAX_SHADER_MEMORY_BYTES;
1253 AssertReturnStmt(off < cbMax && cbNew < cbMax, w->rc = VERR_INVALID_PARAMETER, false);
1254
1255 if (cbNew > w->cbAllocated)
1256 {
1257 if (!dxbcByteWriterRealloc(w, cbNew))
1258 return false;
1259 }
1260
1261 pSavedWriterState->off = dxbcByteWriterSize(w);
1262
1263 w->pu8ByteCodePtr = w->pu8ByteCodeBegin + off;
1264 w->cbRemaining = w->cbAllocated - off;
1265 return true;
1266}
1267
1268
1269DECLINLINE(void) dxbcByteWriterRestore(DXBCByteWriter *w, DXBCByteWriterState *pSavedWriterState)
1270{
1271 w->pu8ByteCodePtr = w->pu8ByteCodeBegin + pSavedWriterState->off;
1272 w->cbRemaining = w->cbAllocated - pSavedWriterState->off;
1273}
1274
1275
1276DECLINLINE(void) dxbcByteWriterCommit(DXBCByteWriter *w, uint32_t cbCommit)
1277{
1278 if (RT_FAILURE(w->rc))
1279 return;
1280
1281 Assert(cbCommit < w->cbRemaining);
1282 cbCommit = RT_MIN(cbCommit, w->cbRemaining);
1283 w->pu8ByteCodePtr += cbCommit;
1284 w->cbRemaining -= cbCommit;
1285 w->cbWritten = RT_MAX(w->cbWritten, w->cbAllocated - w->cbRemaining);
1286}
1287
1288
1289DECLINLINE(bool) dxbcByteWriterCanWrite(DXBCByteWriter *w, uint32_t cbMore)
1290{
1291 if (RT_FAILURE(w->rc))
1292 return false;
1293
1294 if (cbMore <= w->cbRemaining)
1295 return true;
1296
1297 /* Do not allow to allocate more than 2 * SVGA3D_MAX_SHADER_MEMORY_BYTES */
1298 uint32_t const cbMax = 2 * SVGA3D_MAX_SHADER_MEMORY_BYTES;
1299 AssertReturnStmt(cbMore < cbMax && RT_ALIGN_32(cbMore, 4096) <= cbMax - w->cbAllocated, w->rc = VERR_INVALID_PARAMETER, false);
1300
1301 uint32_t cbNew = w->cbAllocated + RT_ALIGN_32(cbMore, 4096);
1302 return dxbcByteWriterRealloc(w, cbNew);
1303}
1304
1305
1306DECLINLINE(bool) dxbcByteWriterAddTokens(DXBCByteWriter *w, uint32_t const *paToken, uint32_t cToken)
1307{
1308 uint32_t const cbWrite = cToken * sizeof(uint32_t);
1309 if (dxbcByteWriterCanWrite(w, cbWrite))
1310 {
1311 memcpy(dxbcByteWriterPtr(w), paToken, cbWrite);
1312 dxbcByteWriterCommit(w, cbWrite);
1313 return true;
1314 }
1315
1316 AssertFailed();
1317 return false;
1318}
1319
1320
1321DECLINLINE(bool) dxbcByteWriterInit(DXBCByteWriter *w, uint32_t cbInitial)
1322{
1323 RT_ZERO(*w);
1324 return dxbcByteWriterCanWrite(w, cbInitial);
1325}
1326
1327
1328DECLINLINE(void) dxbcByteWriterReset(DXBCByteWriter *w)
1329{
1330 RTMemFree(w->pu8ByteCodeBegin);
1331 RT_ZERO(*w);
1332}
1333
1334
1335DECLINLINE(void) dxbcByteWriterFetchData(DXBCByteWriter *w, void **ppv, uint32_t *pcb)
1336{
1337 *ppv = w->pu8ByteCodeBegin;
1338 *pcb = w->cbWritten;
1339
1340 w->pu8ByteCodeBegin = NULL;
1341 dxbcByteWriterReset(w);
1342}
1343
1344
1345/*
1346 *
1347 * VGPU10 shader parser.
1348 *
1349 */
1350
1351/* Parse an instruction operand. */
1352static int dxbcParseOperand(DXBCTokenReader *r, VGPUOperand *paOperand, uint32_t *pcOperandRemain)
1353{
1354 ASSERT_GUEST_RETURN(*pcOperandRemain > 0, VERR_NOT_SUPPORTED);
1355
1356 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1357
1358 paOperand->paOperandToken = dxbcTokenReaderPtr(r);
1359 paOperand->cOperandToken = 0;
1360
1361 VGPU10OperandToken0 operand0;
1362 operand0.value = dxbcTokenReaderRead32(r);
1363
1364 Log6((" %s(%d) %s(%d) %s(%d) %s(%d)\n",
1365 dxbcOperandNumComponentsToString(operand0.numComponents), operand0.numComponents,
1366 dxbcOperandComponentModeToString(operand0.selectionMode), operand0.selectionMode,
1367 dxbcOperandTypeToString(operand0.operandType), operand0.operandType,
1368 dxbcOperandIndexDimensionToString(operand0.indexDimension), operand0.indexDimension));
1369
1370 ASSERT_GUEST_RETURN(operand0.numComponents <= VGPU10_OPERAND_4_COMPONENT, VERR_INVALID_PARAMETER);
1371 if ( operand0.operandType != VGPU10_OPERAND_TYPE_IMMEDIATE32
1372 && operand0.operandType != VGPU10_OPERAND_TYPE_IMMEDIATE64)
1373 {
1374 if (operand0.numComponents == VGPU10_OPERAND_4_COMPONENT)
1375 {
1376 ASSERT_GUEST_RETURN(operand0.selectionMode <= VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE, VERR_INVALID_PARAMETER);
1377 switch (operand0.selectionMode)
1378 {
1379 case VGPU10_OPERAND_4_COMPONENT_MASK_MODE:
1380 Log6((" Mask %#x\n", operand0.mask));
1381 break;
1382 case VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE:
1383 Log6((" Swizzle %s(%d) %s(%d) %s(%d) %s(%d)\n",
1384 dxbcOperandComponentNameToString(operand0.swizzleX), operand0.swizzleX,
1385 dxbcOperandComponentNameToString(operand0.swizzleY), operand0.swizzleY,
1386 dxbcOperandComponentNameToString(operand0.swizzleZ), operand0.swizzleZ,
1387 dxbcOperandComponentNameToString(operand0.swizzleW), operand0.swizzleW));
1388 break;
1389 case VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE:
1390 Log6((" Select %s(%d)\n",
1391 dxbcOperandComponentNameToString(operand0.selectMask), operand0.selectMask));
1392 break;
1393 default: /* Never happens. */
1394 break;
1395 }
1396 }
1397 }
1398
1399 if (operand0.extended)
1400 {
1401 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1402
1403 VGPU10OperandToken1 operand1;
1404 operand1.value = dxbcTokenReaderRead32(r);
1405 }
1406
1407 ASSERT_GUEST_RETURN(operand0.operandType < VGPU10_NUM_OPERANDS, VERR_INVALID_PARAMETER);
1408
1409 if ( operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32
1410 || operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE64)
1411 {
1412 uint32_t cComponent = 0;
1413 if (operand0.numComponents == VGPU10_OPERAND_4_COMPONENT)
1414 cComponent = 4;
1415 else if (operand0.numComponents == VGPU10_OPERAND_1_COMPONENT)
1416 cComponent = 1;
1417
1418 for (uint32_t i = 0; i < cComponent; ++i)
1419 {
1420 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1421 paOperand->aImm[i] = dxbcTokenReaderRead32(r);
1422 }
1423 }
1424
1425 paOperand->numComponents = operand0.numComponents;
1426 paOperand->selectionMode = operand0.selectionMode;
1427 paOperand->mask = operand0.mask;
1428 paOperand->operandType = operand0.operandType;
1429 paOperand->indexDimension = operand0.indexDimension;
1430
1431 int rc = VINF_SUCCESS;
1432 /* 'indexDimension' tells the number of indices. 'i' is the array index, i.e. i = 0 for 1D, etc. */
1433 for (uint32_t i = 0; i < operand0.indexDimension; ++i)
1434 {
1435 if (i == 0) /* VGPU10_OPERAND_INDEX_1D */
1436 paOperand->aOperandIndex[i].indexRepresentation = operand0.index0Representation;
1437 else if (i == 1) /* VGPU10_OPERAND_INDEX_2D */
1438 paOperand->aOperandIndex[i].indexRepresentation = operand0.index1Representation;
1439 else /* VGPU10_OPERAND_INDEX_3D */
1440 continue; /* Skip because it is "rarely if ever used" and is not supported by VGPU10. */
1441
1442 uint32_t const indexRepresentation = paOperand->aOperandIndex[i].indexRepresentation;
1443 switch (indexRepresentation)
1444 {
1445 case VGPU10_OPERAND_INDEX_IMMEDIATE32:
1446 {
1447 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1448 paOperand->aOperandIndex[i].iOperandImmediate = dxbcTokenReaderRead32(r);
1449 break;
1450 }
1451 case VGPU10_OPERAND_INDEX_IMMEDIATE64:
1452 {
1453 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 2), VERR_INVALID_PARAMETER);
1454 paOperand->aOperandIndex[i].iOperandImmediate = dxbcTokenReaderRead64(r);
1455 break;
1456 }
1457 case VGPU10_OPERAND_INDEX_RELATIVE:
1458 {
1459 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1460 paOperand->aOperandIndex[i].pOperandRelative = &paOperand[1];
1461 Log6((" [operand index %d] parsing relative\n", i));
1462 rc = dxbcParseOperand(r, &paOperand[1], pcOperandRemain);
1463 break;
1464 }
1465 case VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE:
1466 {
1467 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 2), VERR_INVALID_PARAMETER);
1468 paOperand->aOperandIndex[i].iOperandImmediate = dxbcTokenReaderRead32(r);
1469 paOperand->aOperandIndex[i].pOperandRelative = &paOperand[1];
1470 Log6((" [operand index %d] parsing relative\n", i));
1471 rc = dxbcParseOperand(r, &paOperand[1], pcOperandRemain);
1472 break;
1473 }
1474 case VGPU10_OPERAND_INDEX_IMMEDIATE64_PLUS_RELATIVE:
1475 {
1476 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 3), VERR_INVALID_PARAMETER);
1477 paOperand->aOperandIndex[i].iOperandImmediate = dxbcTokenReaderRead64(r);
1478 paOperand->aOperandIndex[i].pOperandRelative = &paOperand[1];
1479 Log6((" [operand index %d] parsing relative\n", i));
1480 rc = dxbcParseOperand(r, &paOperand[1], pcOperandRemain);
1481 break;
1482 }
1483 default:
1484 ASSERT_GUEST_FAILED_RETURN(VERR_INVALID_PARAMETER);
1485 }
1486 Log6((" [operand index %d] %s(%d): %#llx%s\n",
1487 i, dxbcOperandIndexRepresentationToString(indexRepresentation), indexRepresentation,
1488 paOperand->aOperandIndex[i].iOperandImmediate, paOperand->aOperandIndex[i].pOperandRelative ? " + relative" : ""));
1489 if (RT_FAILURE(rc))
1490 break;
1491 }
1492
1493 paOperand->cOperandToken = dxbcTokenReaderPtr(r) - paOperand->paOperandToken;
1494
1495 *pcOperandRemain -= 1;
1496 return VINF_SUCCESS;
1497}
1498
1499
1500/* Parse an instruction. */
1501static int dxbcParseOpcode(DXBCTokenReader *r, VGPUOpcode *pOpcode)
1502{
1503 RT_ZERO(*pOpcode);
1504 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1505
1506 pOpcode->paOpcodeToken = dxbcTokenReaderPtr(r);
1507
1508 VGPU10OpcodeToken0 opcode;
1509 opcode.value = dxbcTokenReaderRead32(r);
1510
1511 pOpcode->opcodeType = opcode.opcodeType;
1512 ASSERT_GUEST_RETURN(pOpcode->opcodeType < VGPU10_NUM_OPCODES, VERR_INVALID_PARAMETER);
1513
1514 Log6(("[%#x] %s length %d\n",
1515 dxbcTokenReaderByteOffset(r) - 4, dxbcOpcodeToString(pOpcode->opcodeType), opcode.instructionLength));
1516
1517 uint32_t const cOperand = g_aOpcodeInfo[pOpcode->opcodeType].cOperand;
1518 if (cOperand != UINT32_MAX)
1519 {
1520 ASSERT_GUEST_RETURN(cOperand < RT_ELEMENTS(pOpcode->aIdxOperand), VERR_INVALID_PARAMETER);
1521
1522 pOpcode->cOpcodeToken = opcode.instructionLength;
1523 uint32_t cOpcode = 1; /* Opcode token + extended opcode tokens. */
1524 if (opcode.extended)
1525 {
1526 if ( pOpcode->opcodeType == VGPU10_OPCODE_DCL_FUNCTION_BODY
1527 || pOpcode->opcodeType == VGPU10_OPCODE_DCL_FUNCTION_TABLE
1528 || pOpcode->opcodeType == VGPU10_OPCODE_DCL_INTERFACE
1529 || pOpcode->opcodeType == VGPU10_OPCODE_INTERFACE_CALL
1530 || pOpcode->opcodeType == VGPU10_OPCODE_DCL_THREAD_GROUP)
1531 {
1532 /* "next DWORD contains ... the actual instruction length in DWORD since it may not fit into 7 bits" */
1533 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1534 pOpcode->cOpcodeToken = dxbcTokenReaderRead32(r);
1535 ++cOpcode;
1536 }
1537 else
1538 {
1539 VGPU10OpcodeToken1 opcode1;
1540 do
1541 {
1542 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1543 opcode1.value = dxbcTokenReaderRead32(r);
1544 ++cOpcode;
1545 ASSERT_GUEST( opcode1.opcodeType == VGPU10_EXTENDED_OPCODE_SAMPLE_CONTROLS
1546 || opcode1.opcodeType == D3D11_SB_EXTENDED_OPCODE_RESOURCE_DIM
1547 || opcode1.opcodeType == D3D11_SB_EXTENDED_OPCODE_RESOURCE_RETURN_TYPE);
1548 } while(opcode1.extended);
1549 }
1550 }
1551
1552 ASSERT_GUEST_RETURN(pOpcode->cOpcodeToken >= 1 && pOpcode->cOpcodeToken < 256, VERR_INVALID_PARAMETER);
1553 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, pOpcode->cOpcodeToken - cOpcode), VERR_INVALID_PARAMETER);
1554
1555#ifdef LOG_ENABLED
1556 Log6((" %08X", opcode.value));
1557 for (uint32_t i = 1; i < pOpcode->cOpcodeToken; ++i)
1558 Log6((" %08X", r->pToken[i - 1]));
1559 Log6(("\n"));
1560
1561 if (pOpcode->opcodeType == VGPU10_OPCODE_DCL_RESOURCE)
1562 Log6((" %s\n",
1563 dxbcResourceDimensionToString(opcode.resourceDimension)));
1564 else
1565 Log6((" %s\n",
1566 dxbcInterpolationModeToString(opcode.interpolationMode)));
1567#endif
1568 /* Additional tokens before operands. */
1569 switch (pOpcode->opcodeType)
1570 {
1571 case VGPU10_OPCODE_INTERFACE_CALL:
1572 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1573 dxbcTokenReaderSkip(r, 1); /* Function index */
1574 break;
1575
1576 default:
1577 break;
1578 }
1579
1580 /* Operands. */
1581 uint32_t cOperandRemain = RT_ELEMENTS(pOpcode->aValOperand);
1582 for (uint32_t i = 0; i < cOperand; ++i)
1583 {
1584 Log6((" [operand %d]\n", i));
1585 uint32_t const idxOperand = RT_ELEMENTS(pOpcode->aValOperand) - cOperandRemain;
1586 pOpcode->aIdxOperand[i] = idxOperand;
1587 int rc = dxbcParseOperand(r, &pOpcode->aValOperand[idxOperand], &cOperandRemain);
1588 ASSERT_GUEST_RETURN(RT_SUCCESS(rc), VERR_INVALID_PARAMETER);
1589 }
1590
1591 pOpcode->cOperand = cOperand;
1592
1593 /* Additional tokens after operands. */
1594 switch (pOpcode->opcodeType)
1595 {
1596 case VGPU10_OPCODE_DCL_INPUT_SIV:
1597 case VGPU10_OPCODE_DCL_INPUT_SGV:
1598 case VGPU10_OPCODE_DCL_INPUT_PS_SIV:
1599 case VGPU10_OPCODE_DCL_INPUT_PS_SGV:
1600 case VGPU10_OPCODE_DCL_OUTPUT_SIV:
1601 case VGPU10_OPCODE_DCL_OUTPUT_SGV:
1602 {
1603 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1604
1605 VGPU10NameToken name;
1606 name.value = dxbcTokenReaderRead32(r);
1607 Log6((" %s(%d)\n",
1608 dxbcSystemNameToString(name.name), name.name));
1609 pOpcode->semanticName = name.name;
1610 break;
1611 }
1612 case VGPU10_OPCODE_DCL_RESOURCE:
1613 {
1614 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1615 dxbcTokenReaderSkip(r, 1); /* ResourceReturnTypeToken */
1616 break;
1617 }
1618 case VGPU10_OPCODE_DCL_TEMPS:
1619 {
1620 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1621 dxbcTokenReaderSkip(r, 1); /* number of temps */
1622 break;
1623 }
1624 case VGPU10_OPCODE_DCL_INDEXABLE_TEMP:
1625 {
1626 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 3), VERR_INVALID_PARAMETER);
1627 dxbcTokenReaderSkip(r, 3); /* register index; number of registers; number of components */
1628 break;
1629 }
1630 case VGPU10_OPCODE_DCL_INDEX_RANGE:
1631 {
1632 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1633 dxbcTokenReaderSkip(r, 1); /* count of registers */
1634 break;
1635 }
1636 case VGPU10_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT:
1637 {
1638 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1639 dxbcTokenReaderSkip(r, 1); /* maximum number of primitives */
1640 break;
1641 }
1642 case VGPU10_OPCODE_DCL_GS_INSTANCE_COUNT:
1643 {
1644 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1645 dxbcTokenReaderSkip(r, 1); /* number of instances */
1646 break;
1647 }
1648 case VGPU10_OPCODE_DCL_HS_MAX_TESSFACTOR:
1649 {
1650 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1651 dxbcTokenReaderSkip(r, 1); /* maximum TessFactor */
1652 break;
1653 }
1654 case VGPU10_OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT:
1655 case VGPU10_OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT:
1656 {
1657 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1658 dxbcTokenReaderSkip(r, 1); /* number of instances of the current fork/join phase program to execute */
1659 break;
1660 }
1661 case VGPU10_OPCODE_DCL_THREAD_GROUP:
1662 {
1663 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 3), VERR_INVALID_PARAMETER);
1664 dxbcTokenReaderSkip(r, 3); /* Thread Group dimensions as UINT32: x, y, z */
1665 break;
1666 }
1667 case VGPU10_OPCODE_DCL_UAV_TYPED:
1668 {
1669 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1670 dxbcTokenReaderSkip(r, 1); /* ResourceReturnTypeToken */
1671 break;
1672 }
1673 case VGPU10_OPCODE_DCL_UAV_STRUCTURED:
1674 {
1675 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1676 dxbcTokenReaderSkip(r, 1); /* byte stride */
1677 break;
1678 }
1679 case VGPU10_OPCODE_DCL_TGSM_RAW:
1680 {
1681 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1682 dxbcTokenReaderSkip(r, 1); /* element count */
1683 break;
1684 }
1685 case VGPU10_OPCODE_DCL_TGSM_STRUCTURED:
1686 {
1687 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 2), VERR_INVALID_PARAMETER);
1688 dxbcTokenReaderSkip(r, 2); /* struct byte stride; struct count */
1689 break;
1690 }
1691 case VGPU10_OPCODE_DCL_RESOURCE_STRUCTURED:
1692 {
1693 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1694 dxbcTokenReaderSkip(r, 1); /* struct byte stride */
1695 break;
1696 }
1697 default:
1698 break;
1699 }
1700 }
1701 else
1702 {
1703 /* Special opcodes. */
1704 if (pOpcode->opcodeType == VGPU10_OPCODE_CUSTOMDATA)
1705 {
1706 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1707 pOpcode->cOpcodeToken = dxbcTokenReaderRead32(r);
1708
1709 if (pOpcode->cOpcodeToken < 2)
1710 pOpcode->cOpcodeToken = 2;
1711 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, pOpcode->cOpcodeToken - 2), VERR_INVALID_PARAMETER);
1712
1713#ifdef LOG_ENABLED
1714 Log6((" %08X", opcode.value));
1715 for (uint32_t i = 1; i < pOpcode->cOpcodeToken; ++i)
1716 Log6((" %08X", r->pToken[i - 1]));
1717 Log6(("\n"));
1718
1719 Log6((" %s\n",
1720 dxbcCustomDataClassToString(opcode.customDataClass)));
1721#endif
1722 dxbcTokenReaderSkip(r, pOpcode->cOpcodeToken - 2);
1723 }
1724 else if (pOpcode->opcodeType == VGPU10_OPCODE_VMWARE)
1725 {
1726 pOpcode->cOpcodeToken = opcode.instructionLength;
1727 pOpcode->opcodeSubtype = opcode.vmwareOpcodeType;
1728
1729#ifdef LOG_ENABLED
1730 Log6((" %08X", opcode.value));
1731 for (uint32_t i = 1; i < pOpcode->cOpcodeToken; ++i)
1732 Log6((" %08X", r->pToken[i - 1]));
1733 Log6(("\n"));
1734
1735 Log6((" %s(%d)\n",
1736 dxbcVmwareOpcodeTypeToString(opcode.vmwareOpcodeType), opcode.vmwareOpcodeType));
1737#endif
1738
1739 if (opcode.vmwareOpcodeType == VGPU10_VMWARE_OPCODE_IDIV)
1740 {
1741 /* Integer divide. */
1742 pOpcode->cOperand = 4; /* dstQuit, dstRem, src0, src1. */
1743
1744 /* Operands. */
1745 uint32_t cOperandRemain = RT_ELEMENTS(pOpcode->aValOperand);
1746 for (uint32_t i = 0; i < pOpcode->cOperand; ++i)
1747 {
1748 Log6((" [operand %d]\n", i));
1749 uint32_t const idxOperand = RT_ELEMENTS(pOpcode->aValOperand) - cOperandRemain;
1750 pOpcode->aIdxOperand[i] = idxOperand;
1751 int rc = dxbcParseOperand(r, &pOpcode->aValOperand[idxOperand], &cOperandRemain);
1752 ASSERT_GUEST_RETURN(RT_SUCCESS(rc), VERR_INVALID_PARAMETER);
1753 }
1754 }
1755 //else if (opcode.vmwareOpcodeType == VGPU10_VMWARE_OPCODE_DFRC)
1756 //else if (opcode.vmwareOpcodeType == VGPU10_VMWARE_OPCODE_DRSQ)
1757 else
1758 {
1759 /** @todo implement */
1760 ASSERT_GUEST_FAILED_RETURN(VERR_INVALID_PARAMETER);
1761 }
1762 }
1763 else
1764 ASSERT_GUEST_FAILED_RETURN(VERR_INVALID_PARAMETER);
1765
1766 // pOpcode->cOperand = 0;
1767 }
1768
1769 return VINF_SUCCESS;
1770}
1771
1772
1773typedef struct DXBCOUTPUTCTX
1774{
1775 VGPU10ProgramToken programToken;
1776 uint32_t cToken; /* Number of tokens in the original shader code. */
1777
1778 uint32_t offSubroutine; /* Current offset where to write subroutines. */
1779} DXBCOUTPUTCTX;
1780
1781
1782static void dxbcOutputInit(DXBCOUTPUTCTX *pOutctx, VGPU10ProgramToken const *pProgramToken, uint32_t cToken)
1783{
1784 RT_ZERO(*pOutctx);
1785 pOutctx->programToken = *pProgramToken;
1786 pOutctx->cToken = cToken;
1787
1788 pOutctx->offSubroutine = cToken * 4;
1789}
1790
1791
1792static int dxbcEmitVmwareIDIV(DXBCOUTPUTCTX *pOutctx, DXBCByteWriter *w, VGPUOpcode *pOpcode)
1793{
1794 /* Insert a call and append a subroutne. */
1795 VGPU10OpcodeToken0 opcode;
1796 VGPU10OperandToken0 operand;
1797
1798 uint32_t const label = (pOutctx->offSubroutine - dxbcByteWriterSize(w)) / 4;
1799
1800 /*
1801 * Call
1802 */
1803 opcode.value = 0;
1804 opcode.opcodeType = VGPU10_OPCODE_CALL;
1805 opcode.instructionLength = 3;
1806 dxbcByteWriterAddTokens(w, &opcode.value, 1);
1807
1808 operand.value = 0;
1809 operand.numComponents = VGPU10_OPERAND_1_COMPONENT;
1810 operand.operandType = VGPU10_OPERAND_TYPE_LABEL;
1811 operand.indexDimension = VGPU10_OPERAND_INDEX_1D;
1812 operand.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
1813 dxbcByteWriterAddTokens(w, &operand.value, 1);
1814
1815 dxbcByteWriterAddTokens(w, &label, 1);
1816
1817 opcode.value = 0;
1818 opcode.opcodeType = VGPU10_OPCODE_NOP;
1819 opcode.instructionLength = 1;
1820 for (int i = 0; i < 8 - 3; ++i)
1821 dxbcByteWriterAddTokens(w, &opcode.value, 1);
1822
1823 /*
1824 * Subroutine.
1825 */
1826 DXBCByteWriterState savedWriterState;
1827 if (!dxbcByteWriterSetOffset(w, pOutctx->offSubroutine, &savedWriterState))
1828 return w->rc;
1829
1830 /* label */
1831 opcode.value = 0;
1832 opcode.opcodeType = VGPU10_OPCODE_LABEL;
1833 opcode.instructionLength = 3;
1834 dxbcByteWriterAddTokens(w, &opcode.value, 1);
1835
1836 operand.value = 0;
1837 operand.numComponents = VGPU10_OPERAND_1_COMPONENT;
1838 operand.operandType = VGPU10_OPERAND_TYPE_LABEL;
1839 operand.indexDimension = VGPU10_OPERAND_INDEX_1D;
1840 operand.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
1841 dxbcByteWriterAddTokens(w, &operand.value, 1);
1842 dxbcByteWriterAddTokens(w, &label, 1);
1843
1844 /* Just output UDIV for now. */
1845 opcode.value = 0;
1846 opcode.opcodeType = VGPU10_OPCODE_UDIV;
1847 opcode.instructionLength = pOpcode->cOpcodeToken;
1848 dxbcByteWriterAddTokens(w, &opcode.value, 1);
1849 dxbcByteWriterAddTokens(w, &pOpcode->paOpcodeToken[1], pOpcode->cOpcodeToken - 1);
1850
1851 /* ret */
1852 opcode.value = 0;
1853 opcode.opcodeType = VGPU10_OPCODE_RET;
1854 opcode.instructionLength = 1;
1855 dxbcByteWriterAddTokens(w, &opcode.value, 1);
1856
1857 pOutctx->offSubroutine = dxbcByteWriterSize(w);
1858 dxbcByteWriterRestore(w, &savedWriterState);
1859
1860 return w->rc;
1861}
1862
1863
1864static int dxbcOutputOpcode(DXBCOUTPUTCTX *pOutctx, DXBCByteWriter *w, VGPUOpcode *pOpcode)
1865{
1866#ifdef DEBUG
1867 void *pvBegin = dxbcByteWriterPtr(w);
1868#endif
1869
1870 if ( pOutctx->programToken.programType == VGPU10_PIXEL_SHADER
1871 && pOpcode->opcodeType == VGPU10_OPCODE_DCL_RESOURCE)
1872 {
1873 /** @todo This is a workaround. */
1874 /* Sometimes the guest (Mesa) created a shader with uninitialized resource dimension.
1875 * Use texture 2d because it is what a pixel shader normally uses.
1876 */
1877 ASSERT_GUEST_RETURN(pOpcode->cOpcodeToken == 4, VERR_INVALID_PARAMETER);
1878
1879 VGPU10OpcodeToken0 opcode;
1880 opcode.value = pOpcode->paOpcodeToken[0];
1881 if (opcode.resourceDimension == VGPU10_RESOURCE_DIMENSION_BUFFER)
1882 {
1883 opcode.resourceDimension = VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
1884 dxbcByteWriterAddTokens(w, &opcode.value, 1);
1885 dxbcByteWriterAddTokens(w, &pOpcode->paOpcodeToken[1], 2);
1886 uint32_t const returnType = 0x5555; /* float */
1887 dxbcByteWriterAddTokens(w, &returnType, 1);
1888 return VINF_SUCCESS;
1889 }
1890 }
1891 else if (pOpcode->opcodeType == VGPU10_OPCODE_VMWARE)
1892 {
1893 if (pOpcode->opcodeSubtype == VGPU10_VMWARE_OPCODE_IDIV)
1894 {
1895 return dxbcEmitVmwareIDIV(pOutctx, w, pOpcode);
1896 }
1897
1898 ASSERT_GUEST_FAILED_RETURN(VERR_NOT_SUPPORTED);
1899 }
1900
1901#ifdef DEBUG
1902 /* The code above must emit either nothing or everything. */
1903 Assert((uintptr_t)pvBegin == (uintptr_t)dxbcByteWriterPtr(w));
1904#endif
1905
1906 /* Just emit the unmodified instruction. */
1907 dxbcByteWriterAddTokens(w, pOpcode->paOpcodeToken, pOpcode->cOpcodeToken);
1908 return VINF_SUCCESS;
1909}
1910
1911
1912static int dxbcOutputFinalize(DXBCOUTPUTCTX *pOutctx, DXBCByteWriter *w)
1913{
1914 RT_NOREF(pOutctx, w);
1915 return VINF_SUCCESS;
1916}
1917
1918
1919/*
1920 * Parse and verify the shader byte code. Extract input and output signatures into pInfo.
1921 */
1922int DXShaderParse(void const *pvShaderCode, uint32_t cbShaderCode, DXShaderInfo *pInfo)
1923{
1924 if (pInfo)
1925 RT_ZERO(*pInfo);
1926
1927 ASSERT_GUEST_RETURN(cbShaderCode <= SVGA3D_MAX_SHADER_MEMORY_BYTES, VERR_INVALID_PARAMETER);
1928 ASSERT_GUEST_RETURN((cbShaderCode & 0x3) == 0, VERR_INVALID_PARAMETER); /* Aligned to the token size. */
1929 ASSERT_GUEST_RETURN(cbShaderCode >= 8, VERR_INVALID_PARAMETER); /* At least program and length tokens. */
1930
1931 uint32_t const *paToken = (uint32_t *)pvShaderCode;
1932
1933 VGPU10ProgramToken const *pProgramToken = (VGPU10ProgramToken *)&paToken[0];
1934 ASSERT_GUEST_RETURN( pProgramToken->majorVersion >= 4
1935 && pProgramToken->programType <= VGPU10_COMPUTE_SHADER, VERR_INVALID_PARAMETER);
1936 if (pInfo)
1937 pInfo->enmProgramType = (VGPU10_PROGRAM_TYPE)pProgramToken->programType;
1938
1939 uint32_t const cToken = paToken[1];
1940 Log6(("Shader version %d.%d type %s(%d) Length %d\n",
1941 pProgramToken->majorVersion, pProgramToken->minorVersion, dxbcShaderTypeToString(pProgramToken->programType), pProgramToken->programType, cToken));
1942 ASSERT_GUEST_RETURN(cbShaderCode / 4 >= cToken, VERR_INVALID_PARAMETER); /* Declared length should be less or equal to the actual. */
1943
1944 /* Write the parsed (and possibly modified) shader to a memory buffer. */
1945 DXBCByteWriter dxbcByteWriter;
1946 DXBCByteWriter *w = &dxbcByteWriter;
1947 if (!dxbcByteWriterInit(w, 4096 + cbShaderCode))
1948 return VERR_NO_MEMORY;
1949
1950 dxbcByteWriterAddTokens(w, paToken, 2);
1951
1952 DXBCTokenReader parser;
1953 RT_ZERO(parser);
1954
1955 DXBCTokenReader *r = &parser;
1956 r->pToken = &paToken[2];
1957 r->cToken = r->cRemainingToken = cToken - 2;
1958
1959 DXBCOUTPUTCTX outctx;
1960 dxbcOutputInit(&outctx, pProgramToken, cToken);
1961
1962 int rc = VINF_SUCCESS;
1963 while (dxbcTokenReaderCanRead(r, 1))
1964 {
1965 uint32_t const offOpcode = dxbcByteWriterSize(w);
1966
1967 VGPUOpcode opcode;
1968 rc = dxbcParseOpcode(r, &opcode);
1969 ASSERT_GUEST_STMT_BREAK(RT_SUCCESS(rc), rc = VERR_INVALID_PARAMETER);
1970
1971 rc = dxbcOutputOpcode(&outctx, w, &opcode);
1972 AssertRCBreak(rc);
1973
1974 if (pInfo)
1975 {
1976 /* Remember offsets of DCL_RESOURCE instructions. */
1977 if ( outctx.programToken.programType == VGPU10_PIXEL_SHADER
1978 && opcode.opcodeType == VGPU10_OPCODE_DCL_RESOURCE)
1979 {
1980 ASSERT_GUEST_STMT_BREAK(pInfo->cDclResource <= SVGA3D_DX_MAX_SRVIEWS,
1981 rc = VERR_NOT_SUPPORTED);
1982
1983 pInfo->aOffDclResource[pInfo->cDclResource++] = offOpcode;
1984 }
1985
1986 /* Fetch signatures. */
1987 SVGA3dDXSignatureEntry *pSignatureEntry = NULL;
1988 switch (opcode.opcodeType)
1989 {
1990 case VGPU10_OPCODE_DCL_INPUT:
1991 case VGPU10_OPCODE_DCL_INPUT_PS:
1992 case VGPU10_OPCODE_DCL_INPUT_SIV:
1993 ASSERT_GUEST_STMT_BREAK(pInfo->cInputSignature < RT_ELEMENTS(pInfo->aInputSignature), rc = VERR_INVALID_PARAMETER);
1994 pSignatureEntry = &pInfo->aInputSignature[pInfo->cInputSignature++];
1995 break;
1996 case VGPU10_OPCODE_DCL_OUTPUT:
1997 case VGPU10_OPCODE_DCL_OUTPUT_SIV:
1998 case VGPU10_OPCODE_DCL_OUTPUT_SGV:
1999 ASSERT_GUEST_STMT_BREAK(pInfo->cOutputSignature < RT_ELEMENTS(pInfo->aOutputSignature), rc = VERR_INVALID_PARAMETER);
2000 pSignatureEntry = &pInfo->aOutputSignature[pInfo->cOutputSignature++];
2001 break;
2002 default:
2003 break;
2004 }
2005
2006 if (RT_FAILURE(rc))
2007 break;
2008
2009 if (pSignatureEntry)
2010 {
2011 ASSERT_GUEST_STMT_BREAK( opcode.aValOperand[0].aOperandIndex[0].indexRepresentation == VGPU10_OPERAND_INDEX_IMMEDIATE32
2012 || opcode.aValOperand[0].aOperandIndex[0].indexRepresentation == VGPU10_OPERAND_INDEX_IMMEDIATE64,
2013 rc = VERR_NOT_SUPPORTED);
2014
2015 uint32_t const indexDimension = opcode.aValOperand[0].indexDimension;
2016 if (indexDimension == VGPU10_OPERAND_INDEX_0D)
2017 {
2018 if (opcode.aValOperand[0].operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID)
2019 {
2020 pSignatureEntry->registerIndex = 0;
2021 pSignatureEntry->semanticName = SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID;
2022 }
2023 else if (opcode.aValOperand[0].operandType == VGPU10_OPERAND_TYPE_OUTPUT_DEPTH)
2024 {
2025 /* oDepth is always last in the signature. Register index is equal to 0xFFFFFFFF. */
2026 pSignatureEntry->registerIndex = 0xFFFFFFFF;
2027 pSignatureEntry->semanticName = SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED;
2028 }
2029 else if (opcode.aValOperand[0].operandType <= VGPU10_OPERAND_TYPE_SM50_MAX)
2030 {
2031 pSignatureEntry->registerIndex = 0;
2032 pSignatureEntry->semanticName = opcode.semanticName;
2033 }
2034 else
2035 ASSERT_GUEST_FAILED_STMT_BREAK(rc = VERR_NOT_SUPPORTED);
2036 }
2037 else
2038 {
2039 ASSERT_GUEST_STMT_BREAK( indexDimension == VGPU10_OPERAND_INDEX_1D
2040 || indexDimension == VGPU10_OPERAND_INDEX_2D
2041 || indexDimension == VGPU10_OPERAND_INDEX_3D,
2042 rc = VERR_NOT_SUPPORTED);
2043 /* The register index seems to be in the highest dimension. */
2044 pSignatureEntry->registerIndex = opcode.aValOperand[0].aOperandIndex[indexDimension - VGPU10_OPERAND_INDEX_1D].iOperandImmediate;
2045 pSignatureEntry->semanticName = opcode.semanticName;
2046 }
2047 pSignatureEntry->mask = opcode.aValOperand[0].mask;
2048 pSignatureEntry->componentType = SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN; /// @todo Proper value? Seems that it is not important.
2049 pSignatureEntry->minPrecision = SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT;
2050 }
2051 }
2052 }
2053
2054 if (RT_FAILURE(rc))
2055 {
2056 return rc;
2057 }
2058
2059 rc = dxbcOutputFinalize(&outctx, w);
2060 if (RT_FAILURE(rc))
2061 {
2062 return rc;
2063 }
2064
2065 dxbcByteWriterFetchData(w, &pInfo->pvBytecode, &pInfo->cbBytecode);
2066 uint32_t *pcOutputToken = (uint32_t *)pInfo->pvBytecode + 1;
2067 *pcOutputToken = pInfo->cbBytecode / 4;
2068
2069#ifdef LOG_ENABLED
2070 if (pInfo->cInputSignature)
2071 {
2072 Log6(("Input signatures:\n"));
2073 for (uint32_t i = 0; i < pInfo->cInputSignature; ++i)
2074 Log6((" [%u]: %u %u 0x%X\n", i, pInfo->aInputSignature[i].registerIndex, pInfo->aInputSignature[i].semanticName, pInfo->aInputSignature[i].mask));
2075 }
2076 if (pInfo->cOutputSignature)
2077 {
2078 Log6(("Output signatures:\n"));
2079 for (uint32_t i = 0; i < pInfo->cOutputSignature; ++i)
2080 Log6((" [%u]: %u %u 0x%X\n", i, pInfo->aOutputSignature[i].registerIndex, pInfo->aOutputSignature[i].semanticName, pInfo->aOutputSignature[i].mask));
2081 }
2082 if (pInfo->cPatchConstantSignature)
2083 {
2084 Log6(("Patch constant signatures:\n"));
2085 for (uint32_t i = 0; i < pInfo->cPatchConstantSignature; ++i)
2086 Log6((" [%u]: %u %u 0x%X\n", i, pInfo->aPatchConstantSignature[i].registerIndex, pInfo->aPatchConstantSignature[i].semanticName, pInfo->aPatchConstantSignature[i].mask));
2087 }
2088#endif
2089
2090 return VINF_SUCCESS;
2091}
2092
2093
2094void DXShaderFree(DXShaderInfo *pInfo)
2095{
2096 RTMemFree(pInfo->pvBytecode);
2097 RT_ZERO(*pInfo);
2098}
2099
2100
2101#if 0 // Unused. Replaced with dxbcSemanticInfo.
2102static char const *dxbcSemanticName(SVGA3dDXSignatureSemanticName enmSemanticName)
2103{
2104 /* https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-semantics#system-value-semantics */
2105 switch (enmSemanticName)
2106 {
2107 case SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION: return "SV_Position";
2108 case SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE: return "SV_ClipDistance";
2109 case SVGADX_SIGNATURE_SEMANTIC_NAME_CULL_DISTANCE: return "SV_CullDistance";
2110 case SVGADX_SIGNATURE_SEMANTIC_NAME_RENDER_TARGET_ARRAY_INDEX: return "SV_RenderTargetArrayIndex";
2111 case SVGADX_SIGNATURE_SEMANTIC_NAME_VIEWPORT_ARRAY_INDEX: return "SV_ViewportArrayIndex";
2112 case SVGADX_SIGNATURE_SEMANTIC_NAME_VERTEX_ID: return "SV_VertexID";
2113 case SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID: return "SV_PrimitiveID";
2114 case SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID: return "SV_InstanceID";
2115 case SVGADX_SIGNATURE_SEMANTIC_NAME_IS_FRONT_FACE: return "SV_IsFrontFace";
2116 case SVGADX_SIGNATURE_SEMANTIC_NAME_SAMPLE_INDEX: return "SV_SampleIndex";
2117 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR: return "SV_FinalQuadUeq0EdgeTessFactor";
2118 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR: return "SV_FinalQuadVeq0EdgeTessFactor";
2119 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR: return "SV_FinalQuadUeq1EdgeTessFactor";
2120 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR: return "SV_FinalQuadVeq1EdgeTessFactor";
2121 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR: return "SV_FinalQuadUInsideTessFactor";
2122 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR: return "SV_FinalQuadVInsideTessFactor";
2123 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR: return "SV_FinalTriUeq0EdgeTessFactor";
2124 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR: return "SV_FinalTriVeq0EdgeTessFactor";
2125 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR: return "SV_FinalTriWeq0EdgeTessFactor";
2126 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR: return "SV_FinalTriInsideTessFactor";
2127 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DETAIL_TESSFACTOR: return "SV_FinalLineDetailTessFactor";
2128 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DENSITY_TESSFACTOR: return "SV_FinalLineDensityTessFactor";
2129 default:
2130 Assert(enmSemanticName == SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
2131 break;
2132 }
2133 /* Generic. Arbitrary name. It does not have any meaning. */
2134 return "ATTRIB";
2135}
2136#endif
2137
2138
2139/* https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-semantics#system-value-semantics
2140 * Type:
2141 * 0 - undefined
2142 * 1 - unsigned int
2143 * 2 - signed int
2144 * 3 - float
2145 */
2146typedef struct VGPUSemanticInfo
2147{
2148 char const *pszName;
2149 uint32_t u32Type;
2150} VGPUSemanticInfo;
2151
2152static VGPUSemanticInfo const g_aSemanticInfo[SVGADX_SIGNATURE_SEMANTIC_NAME_MAX] =
2153{
2154 { "ATTRIB", 0 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED 0
2155 { "SV_Position", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION 1
2156 { "SV_ClipDistance", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE 2
2157 { "SV_CullDistance", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_CULL_DISTANCE 3
2158 { "SV_RenderTargetArrayIndex", 1 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_RENDER_TARGET_ARRAY_INDEX 4
2159 { "SV_ViewportArrayIndex", 1 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_VIEWPORT_ARRAY_INDEX 5
2160 { "SV_VertexID", 1 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_VERTEX_ID 6
2161 { "SV_PrimitiveID", 1 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID 7
2162 { "SV_InstanceID", 1 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID 8
2163 { "SV_IsFrontFace", 1 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_IS_FRONT_FACE 9
2164 { "SV_SampleIndex", 1 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_SAMPLE_INDEX 10
2165 /** @todo Is this a correct name for all TessFactors? */
2166 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR 11
2167 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR 12
2168 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR 13
2169 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR 14
2170 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR 15
2171 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR 16
2172 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR 17
2173 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR 18
2174 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR 19
2175 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR 20
2176 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DETAIL_TESSFACTOR 21
2177 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DENSITY_TESSFACTOR 22
2178};
2179
2180static VGPUSemanticInfo const g_SemanticPSOutput =
2181 { "SV_TARGET", 3 }; // SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED 0
2182
2183
2184static VGPUSemanticInfo const *dxbcSemanticInfo(DXShaderInfo const *pInfo, SVGA3dDXSignatureSemanticName enmSemanticName, uint32_t u32BlobType)
2185{
2186 if (enmSemanticName < RT_ELEMENTS(g_aSemanticInfo))
2187 {
2188 if ( enmSemanticName == 0
2189 && pInfo->enmProgramType == VGPU10_PIXEL_SHADER
2190 && u32BlobType == DXBC_BLOB_TYPE_OSGN)
2191 return &g_SemanticPSOutput;
2192 return &g_aSemanticInfo[enmSemanticName];
2193 }
2194 return &g_aSemanticInfo[0];
2195}
2196
2197
2198static int dxbcCreateIOSGNBlob(DXShaderInfo const *pInfo, DXBCHeader *pHdr, uint32_t u32BlobType,
2199 uint32_t cSignature, SVGA3dDXSignatureEntry const *paSignature, DXBCByteWriter *w)
2200{
2201 AssertReturn(cSignature <= SVGA3D_DX_SM41_MAX_VERTEXINPUTREGISTERS, VERR_INVALID_PARAMETER);
2202
2203 uint32_t cbBlob = RT_UOFFSETOF_DYN(DXBCBlobIOSGN, aElement[cSignature]);
2204 if (!dxbcByteWriterCanWrite(w, sizeof(DXBCBlobHeader) + cbBlob))
2205 return VERR_NO_MEMORY;
2206
2207 DXBCBlobHeader *pHdrBlob = (DXBCBlobHeader *)dxbcByteWriterPtr(w);
2208 pHdrBlob->u32BlobType = u32BlobType;
2209 // pHdrBlob->cbBlob = 0;
2210
2211 DXBCBlobIOSGN *pHdrISGN = (DXBCBlobIOSGN *)&pHdrBlob[1];
2212 pHdrISGN->cElement = cSignature;
2213 pHdrISGN->offElement = RT_UOFFSETOF(DXBCBlobIOSGN, aElement[0]);
2214
2215 if (pInfo->fGuestSignatures)
2216 {
2217 uint32_t aSemanticIdx[SVGADX_SIGNATURE_SEMANTIC_NAME_MAX];
2218 RT_ZERO(aSemanticIdx);
2219 for (uint32_t iSignature = 0; iSignature < cSignature; ++iSignature)
2220 {
2221 SVGA3dDXSignatureEntry const *src = &paSignature[iSignature];
2222 DXBCBlobIOSGNElement *dst = &pHdrISGN->aElement[iSignature];
2223
2224 ASSERT_GUEST_RETURN(src->semanticName < SVGADX_SIGNATURE_SEMANTIC_NAME_MAX, VERR_INVALID_PARAMETER);
2225 VGPUSemanticInfo const *pSemanticInfo = dxbcSemanticInfo(pInfo, src->semanticName, u32BlobType);
2226
2227 dst->offElementName = cbBlob; /* Offset of the semantic's name relative to the start of the blob (without hdr). */
2228 /* Use the register index as the semantic index for generic attributes in order to
2229 * produce compatible semantic names between shaders.
2230 */
2231 dst->idxSemantic = src->semanticName == SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED
2232 ? src->registerIndex
2233 : aSemanticIdx[src->semanticName]++;
2234 dst->enmSystemValue = src->semanticName;
2235 dst->enmComponentType = src->componentType;
2236 dst->idxRegister = src->registerIndex;
2237 dst->u.mask = src->mask;
2238
2239 /* Figure out the semantic name for this element. */
2240 char const * const pszElementName = pSemanticInfo->pszName;
2241 uint32_t const cbElementName = (uint32_t)strlen(pszElementName) + 1;
2242
2243 if (!dxbcByteWriterCanWrite(w, cbBlob + cbElementName))
2244 return VERR_NO_MEMORY;
2245
2246 char *pszElementNameDst = (char *)pHdrISGN + dst->offElementName;
2247 memcpy(pszElementNameDst, pszElementName, cbElementName);
2248
2249 cbBlob += cbElementName;
2250 }
2251 }
2252 else
2253 {
2254 /* If the signature has been created from the shader code, then sort the signature entries
2255 * by the register index, because 3D API requires this.
2256 *
2257 * signature semantic reg -> signature semantic reg
2258 * [0] 0 2 [5] 0 0
2259 * [1] 1 1 [3] 1 0
2260 * [2] 2 0 [2] 2 0
2261 * [3] 1 0 [4] 0 1
2262 * [4] 0 1 [1] 1 1
2263 * [5] 0 0 [0] 0 2
2264 */
2265
2266 /* aIdxSignature contains signature indices. aIdxSignature[s][0] = signature index for register 0 for semantic s. */
2267 uint32_t aIdxSignature[SVGADX_SIGNATURE_SEMANTIC_NAME_MAX][SVGA3D_DX_SM41_MAX_VERTEXINPUTREGISTERS];
2268 memset(aIdxSignature, 0xFF, sizeof(aIdxSignature));
2269 for (uint32_t i = 0; i < cSignature; ++i)
2270 {
2271 SVGA3dDXSignatureEntry const *src = &paSignature[i];
2272 ASSERT_GUEST_RETURN(src->semanticName < SVGADX_SIGNATURE_SEMANTIC_NAME_MAX, VERR_INVALID_PARAMETER);
2273 if (src->registerIndex == 0xFFFFFFFF)
2274 {
2275 /* oDepth for PS output. */
2276 ASSERT_GUEST_RETURN(pInfo->enmProgramType == VGPU10_PIXEL_SHADER, VERR_INVALID_PARAMETER);
2277
2278 /* Must be placed last in the signature. */
2279 ASSERT_GUEST_RETURN(aIdxSignature[src->semanticName][cSignature - 1] == 0xFFFFFFFF, VERR_INVALID_PARAMETER);
2280 aIdxSignature[src->semanticName][cSignature - 1] = i;
2281 continue;
2282 }
2283
2284 ASSERT_GUEST_RETURN(src->registerIndex < SVGA3D_DX_SM41_MAX_VERTEXINPUTREGISTERS, VERR_INVALID_PARAMETER);
2285 ASSERT_GUEST_RETURN(aIdxSignature[src->semanticName][src->registerIndex] == 0xFFFFFFFF, VERR_INVALID_PARAMETER);
2286 aIdxSignature[src->semanticName][src->registerIndex] = i;
2287 }
2288
2289 uint32_t aSemanticIdx[SVGADX_SIGNATURE_SEMANTIC_NAME_MAX];
2290 RT_ZERO(aSemanticIdx);
2291 uint32_t iElement = 0;
2292 for (uint32_t iReg = 0; iReg < SVGA3D_DX_SM41_MAX_VERTEXINPUTREGISTERS; ++iReg)
2293 {
2294 for (unsigned iSemanticName = 0; iSemanticName < SVGADX_SIGNATURE_SEMANTIC_NAME_MAX; ++iSemanticName)
2295 {
2296 if (aIdxSignature[iSemanticName][iReg] == 0xFFFFFFFF) /* This register is unused. */
2297 continue;
2298
2299 SVGA3dDXSignatureEntry const *src = &paSignature[aIdxSignature[iSemanticName][iReg]];
2300
2301 AssertReturn(iElement < cSignature, VERR_INTERNAL_ERROR);
2302 DXBCBlobIOSGNElement *dst = &pHdrISGN->aElement[iElement];
2303
2304 ASSERT_GUEST_RETURN(src->semanticName < SVGADX_SIGNATURE_SEMANTIC_NAME_MAX, VERR_INVALID_PARAMETER);
2305 VGPUSemanticInfo const *pSemanticInfo = dxbcSemanticInfo(pInfo, src->semanticName, u32BlobType);
2306
2307 dst->offElementName = cbBlob; /* Offset of the semantic's name relative to the start of the blob (without hdr). */
2308 /* Use the register index as the semantic index for generic attributes in order to
2309 * produce compatible semantic names between shaders.
2310 */
2311 dst->idxSemantic = src->semanticName == SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED
2312 ? src->registerIndex
2313 : aSemanticIdx[src->semanticName]++;
2314 dst->enmSystemValue = src->semanticName;
2315 /* Set type = 'undefined' to make the type match types between shader stages.
2316 * ('src->componentType ? src->componentType : pSemanticInfo->u32Type;' was used in the past)
2317 */
2318 dst->enmComponentType = 0;
2319 dst->idxRegister = src->registerIndex;
2320 dst->u.m.mask = src->mask;
2321 if (u32BlobType == DXBC_BLOB_TYPE_OSGN)
2322 dst->u.m.mask2 = 0;
2323 else
2324 dst->u.m.mask2 = src->mask;
2325
2326 /* Figure out the semantic name for this element. */
2327 char const * const pszElementName = pSemanticInfo->pszName;
2328 uint32_t const cbElementName = (uint32_t)strlen(pszElementName) + 1;
2329
2330 Log(("[%d] semantic %d, register %d, %s%d\n", iElement, dst->enmSystemValue, dst->idxRegister, pszElementName, dst->idxSemantic));
2331
2332 if (!dxbcByteWriterCanWrite(w, cbBlob + cbElementName))
2333 return VERR_NO_MEMORY;
2334
2335 char *pszElementNameDst = (char *)pHdrISGN + dst->offElementName;
2336 memcpy(pszElementNameDst, pszElementName, cbElementName);
2337
2338 cbBlob += cbElementName;
2339 ++iElement;
2340 }
2341 }
2342 }
2343
2344 /* Blobs are 4 bytes aligned. Commit the blob data. */
2345 cbBlob = RT_ALIGN_32(cbBlob, 4);
2346 pHdrBlob->cbBlob = cbBlob;
2347 pHdr->cbTotal += cbBlob + sizeof(DXBCBlobHeader);
2348 dxbcByteWriterCommit(w, cbBlob + sizeof(DXBCBlobHeader));
2349 return VINF_SUCCESS;
2350}
2351
2352
2353static int dxbcCreateSHDRBlob(DXBCHeader *pHdr, uint32_t u32BlobType,
2354 void const *pvShader, uint32_t cbShader, DXBCByteWriter *w)
2355{
2356 uint32_t cbBlob = cbShader;
2357 if (!dxbcByteWriterCanWrite(w, sizeof(DXBCBlobHeader) + cbBlob))
2358 return VERR_NO_MEMORY;
2359
2360 DXBCBlobHeader *pHdrBlob = (DXBCBlobHeader *)dxbcByteWriterPtr(w);
2361 pHdrBlob->u32BlobType = u32BlobType;
2362 // pHdrBlob->cbBlob = 0;
2363
2364 memcpy(&pHdrBlob[1], pvShader, cbShader);
2365
2366 /* Blobs are 4 bytes aligned. Commit the blob data. */
2367 cbBlob = RT_ALIGN_32(cbBlob, 4);
2368 pHdrBlob->cbBlob = cbBlob;
2369 pHdr->cbTotal += cbBlob + sizeof(DXBCBlobHeader);
2370 dxbcByteWriterCommit(w, cbBlob + sizeof(DXBCBlobHeader));
2371 return VINF_SUCCESS;
2372}
2373
2374
2375/*
2376 * Create a DXBC container with signature and shader code data blobs.
2377 */
2378static int dxbcCreateFromInfo(DXShaderInfo const *pInfo, void const *pvShader, uint32_t cbShader, DXBCByteWriter *w)
2379{
2380 int rc;
2381
2382 /* Create a DXBC container with ISGN, OSGN and SHDR blobs. */
2383 uint32_t const cBlob = 3;
2384 uint32_t const cbHdr = RT_UOFFSETOF(DXBCHeader, aBlobOffset[cBlob]); /* Header with blob offsets. */
2385 if (!dxbcByteWriterCanWrite(w, cbHdr))
2386 return VERR_NO_MEMORY;
2387
2388 /* Container header. */
2389 DXBCHeader *pHdr = (DXBCHeader *)dxbcByteWriterPtr(w);
2390 pHdr->u32DXBC = DXBC_MAGIC;
2391 // RT_ZERO(pHdr->au8Hash);
2392 pHdr->u32Version = 1;
2393 pHdr->cbTotal = cbHdr;
2394 pHdr->cBlob = cBlob;
2395 //RT_ZERO(pHdr->aBlobOffset);
2396 dxbcByteWriterCommit(w, cbHdr);
2397
2398 /* Blobs. */
2399 uint32_t iBlob = 0;
2400
2401 pHdr->aBlobOffset[iBlob++] = dxbcByteWriterSize(w);
2402 rc = dxbcCreateIOSGNBlob(pInfo, pHdr, DXBC_BLOB_TYPE_ISGN, pInfo->cInputSignature, &pInfo->aInputSignature[0], w);
2403 AssertRCReturn(rc, rc);
2404
2405 pHdr->aBlobOffset[iBlob++] = dxbcByteWriterSize(w);
2406 rc = dxbcCreateIOSGNBlob(pInfo, pHdr, DXBC_BLOB_TYPE_OSGN, pInfo->cOutputSignature, &pInfo->aOutputSignature[0], w);
2407 AssertRCReturn(rc, rc);
2408
2409 pHdr->aBlobOffset[iBlob++] = dxbcByteWriterSize(w);
2410 rc = dxbcCreateSHDRBlob(pHdr, DXBC_BLOB_TYPE_SHDR, pvShader, cbShader, w);
2411 AssertRCReturn(rc, rc);
2412
2413 AssertCompile(RT_UOFFSETOF(DXBCHeader, u32Version) == 0x14);
2414 dxbcHash(&pHdr->u32Version, pHdr->cbTotal - RT_UOFFSETOF(DXBCHeader, u32Version), pHdr->au8Hash);
2415
2416 return VINF_SUCCESS;
2417}
2418
2419
2420int DXShaderCreateDXBC(DXShaderInfo const *pInfo, void **ppvDXBC, uint32_t *pcbDXBC)
2421{
2422 /* Build DXBC container. */
2423 int rc;
2424 DXBCByteWriter dxbcByteWriter;
2425 DXBCByteWriter *w = &dxbcByteWriter;
2426 if (dxbcByteWriterInit(w, 4096 + pInfo->cbBytecode))
2427 {
2428 rc = dxbcCreateFromInfo(pInfo, pInfo->pvBytecode, pInfo->cbBytecode, w);
2429 if (RT_SUCCESS(rc))
2430 dxbcByteWriterFetchData(w, ppvDXBC, pcbDXBC);
2431 }
2432 else
2433 rc = VERR_NO_MEMORY;
2434 return rc;
2435}
2436
2437
2438static char const *dxbcGetOutputSemanticName(DXShaderInfo const *pInfo, uint32_t idxRegister, uint32_t u32BlobType,
2439 uint32_t cSignature, SVGA3dDXSignatureEntry const *paSignature,
2440 SVGA3dDXSignatureSemanticName *pSemanticName)
2441{
2442 for (uint32_t i = 0; i < cSignature; ++i)
2443 {
2444 SVGA3dDXSignatureEntry const *p = &paSignature[i];
2445 if (p->registerIndex == idxRegister)
2446 {
2447 AssertReturn(p->semanticName < SVGADX_SIGNATURE_SEMANTIC_NAME_MAX, NULL);
2448 VGPUSemanticInfo const *pSemanticInfo = dxbcSemanticInfo(pInfo, p->semanticName, u32BlobType);
2449 *pSemanticName = p->semanticName;
2450 return pSemanticInfo->pszName;
2451 }
2452 }
2453 return NULL;
2454}
2455
2456char const *DXShaderGetOutputSemanticName(DXShaderInfo const *pInfo, uint32_t idxRegister, SVGA3dDXSignatureSemanticName *pSemanticName)
2457{
2458 return dxbcGetOutputSemanticName(pInfo, idxRegister, DXBC_BLOB_TYPE_OSGN, pInfo->cOutputSignature, &pInfo->aOutputSignature[0], pSemanticName);
2459}
2460
2461VGPU10_RESOURCE_RETURN_TYPE DXShaderResourceReturnTypeFromFormat(SVGA3dSurfaceFormat format)
2462{
2463 /** @todo This is auto-generated from format names and needs a review. */
2464 switch (format)
2465 {
2466 case SVGA3D_R32G32B32A32_UINT: return VGPU10_RETURN_TYPE_UINT;
2467 case SVGA3D_R32G32B32A32_SINT: return VGPU10_RETURN_TYPE_SINT;
2468 case SVGA3D_R32G32B32_FLOAT: return VGPU10_RETURN_TYPE_FLOAT;
2469 case SVGA3D_R32G32B32_UINT: return VGPU10_RETURN_TYPE_UINT;
2470 case SVGA3D_R32G32B32_SINT: return VGPU10_RETURN_TYPE_SINT;
2471 case SVGA3D_R16G16B16A16_UINT: return VGPU10_RETURN_TYPE_UINT;
2472 case SVGA3D_R16G16B16A16_SNORM: return VGPU10_RETURN_TYPE_SNORM;
2473 case SVGA3D_R16G16B16A16_SINT: return VGPU10_RETURN_TYPE_SINT;
2474 case SVGA3D_R32G32_UINT: return VGPU10_RETURN_TYPE_UINT;
2475 case SVGA3D_R32G32_SINT: return VGPU10_RETURN_TYPE_SINT;
2476 case SVGA3D_D32_FLOAT_S8X24_UINT: return VGPU10_RETURN_TYPE_UINT;
2477 case SVGA3D_R32_FLOAT_X8X24: return VGPU10_RETURN_TYPE_FLOAT;
2478 case SVGA3D_X32_G8X24_UINT: return VGPU10_RETURN_TYPE_UINT;
2479 case SVGA3D_R10G10B10A2_UINT: return VGPU10_RETURN_TYPE_UINT;
2480 case SVGA3D_R11G11B10_FLOAT: return VGPU10_RETURN_TYPE_FLOAT;
2481 case SVGA3D_R8G8B8A8_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2482 case SVGA3D_R8G8B8A8_UNORM_SRGB: return VGPU10_RETURN_TYPE_UNORM;
2483 case SVGA3D_R8G8B8A8_UINT: return VGPU10_RETURN_TYPE_UINT;
2484 case SVGA3D_R8G8B8A8_SINT: return VGPU10_RETURN_TYPE_SINT;
2485 case SVGA3D_R16G16_UINT: return VGPU10_RETURN_TYPE_UINT;
2486 case SVGA3D_R16G16_SINT: return VGPU10_RETURN_TYPE_SINT;
2487 case SVGA3D_D32_FLOAT: return VGPU10_RETURN_TYPE_FLOAT;
2488 case SVGA3D_R32_UINT: return VGPU10_RETURN_TYPE_UINT;
2489 case SVGA3D_R32_SINT: return VGPU10_RETURN_TYPE_SINT;
2490 case SVGA3D_D24_UNORM_S8_UINT: return VGPU10_RETURN_TYPE_UNORM;
2491 case SVGA3D_R24_UNORM_X8: return VGPU10_RETURN_TYPE_UNORM;
2492 case SVGA3D_X24_G8_UINT: return VGPU10_RETURN_TYPE_UINT;
2493 case SVGA3D_R8G8_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2494 case SVGA3D_R8G8_UINT: return VGPU10_RETURN_TYPE_UINT;
2495 case SVGA3D_R8G8_SINT: return VGPU10_RETURN_TYPE_SINT;
2496 case SVGA3D_R16_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2497 case SVGA3D_R16_UINT: return VGPU10_RETURN_TYPE_UINT;
2498 case SVGA3D_R16_SNORM: return VGPU10_RETURN_TYPE_SNORM;
2499 case SVGA3D_R16_SINT: return VGPU10_RETURN_TYPE_SINT;
2500 case SVGA3D_R8_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2501 case SVGA3D_R8_UINT: return VGPU10_RETURN_TYPE_UINT;
2502 case SVGA3D_R8_SNORM: return VGPU10_RETURN_TYPE_SNORM;
2503 case SVGA3D_R8_SINT: return VGPU10_RETURN_TYPE_SINT;
2504 case SVGA3D_R8G8_B8G8_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2505 case SVGA3D_G8R8_G8B8_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2506 case SVGA3D_BC1_UNORM_SRGB: return VGPU10_RETURN_TYPE_UNORM;
2507 case SVGA3D_BC2_UNORM_SRGB: return VGPU10_RETURN_TYPE_UNORM;
2508 case SVGA3D_BC3_UNORM_SRGB: return VGPU10_RETURN_TYPE_UNORM;
2509 case SVGA3D_BC4_SNORM: return VGPU10_RETURN_TYPE_SNORM;
2510 case SVGA3D_BC5_SNORM: return VGPU10_RETURN_TYPE_SNORM;
2511 case SVGA3D_R10G10B10_XR_BIAS_A2_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2512 case SVGA3D_B8G8R8A8_UNORM_SRGB: return VGPU10_RETURN_TYPE_UNORM;
2513 case SVGA3D_B8G8R8X8_UNORM_SRGB: return VGPU10_RETURN_TYPE_UNORM;
2514 case SVGA3D_R32G32B32A32_FLOAT: return VGPU10_RETURN_TYPE_FLOAT;
2515 case SVGA3D_R16G16B16A16_FLOAT: return VGPU10_RETURN_TYPE_FLOAT;
2516 case SVGA3D_R16G16B16A16_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2517 case SVGA3D_R32G32_FLOAT: return VGPU10_RETURN_TYPE_FLOAT;
2518 case SVGA3D_R10G10B10A2_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2519 case SVGA3D_R8G8B8A8_SNORM: return VGPU10_RETURN_TYPE_SNORM;
2520 case SVGA3D_R16G16_FLOAT: return VGPU10_RETURN_TYPE_FLOAT;
2521 case SVGA3D_R16G16_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2522 case SVGA3D_R16G16_SNORM: return VGPU10_RETURN_TYPE_SNORM;
2523 case SVGA3D_R32_FLOAT: return VGPU10_RETURN_TYPE_FLOAT;
2524 case SVGA3D_R8G8_SNORM: return VGPU10_RETURN_TYPE_SNORM;
2525 case SVGA3D_R16_FLOAT: return VGPU10_RETURN_TYPE_FLOAT;
2526 case SVGA3D_D16_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2527 case SVGA3D_A8_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2528 case SVGA3D_BC1_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2529 case SVGA3D_BC2_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2530 case SVGA3D_BC3_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2531 case SVGA3D_B5G6R5_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2532 case SVGA3D_B5G5R5A1_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2533 case SVGA3D_B8G8R8A8_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2534 case SVGA3D_B8G8R8X8_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2535 case SVGA3D_BC4_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2536 case SVGA3D_BC5_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2537 case SVGA3D_B4G4R4A4_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2538 case SVGA3D_BC7_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2539 case SVGA3D_BC7_UNORM_SRGB: return VGPU10_RETURN_TYPE_UNORM;
2540 default:
2541 break;
2542 }
2543 return VGPU10_RETURN_TYPE_UNORM;
2544}
2545
2546int DXShaderUpdateResources(DXShaderInfo const *pInfo, VGPU10_RESOURCE_DIMENSION *paResourceDimension,
2547 VGPU10_RESOURCE_RETURN_TYPE *paResourceReturnType, uint32_t cResources)
2548{
2549 for (uint32_t i = 0; i < pInfo->cDclResource; ++i)
2550 {
2551 VGPU10_RESOURCE_DIMENSION const resourceDimension = i < cResources ? paResourceDimension[i] : VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
2552 AssertContinue(resourceDimension <= VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY);
2553
2554 VGPU10_RESOURCE_RETURN_TYPE const resourceReturnType = i < cResources ? paResourceReturnType[i] : VGPU10_RETURN_TYPE_FLOAT;
2555 AssertContinue(resourceReturnType <= VGPU10_RETURN_TYPE_MIXED);
2556
2557 uint32_t const offToken = pInfo->aOffDclResource[i];
2558 AssertContinue(offToken < pInfo->cbBytecode);
2559 uint32_t *paToken = (uint32_t *)((uintptr_t)pInfo->pvBytecode + offToken);
2560
2561 VGPU10OpcodeToken0 *pOpcode = (VGPU10OpcodeToken0 *)&paToken[0];
2562 pOpcode->resourceDimension = resourceDimension;
2563 // paToken[1] unmodified
2564 // paToken[2] unmodified
2565 VGPU10ResourceReturnTypeToken *pReturnTypeToken = (VGPU10ResourceReturnTypeToken *)&paToken[3];
2566 pReturnTypeToken->component0 = (uint8_t)resourceReturnType;
2567 pReturnTypeToken->component1 = (uint8_t)resourceReturnType;
2568 pReturnTypeToken->component2 = (uint8_t)resourceReturnType;
2569 pReturnTypeToken->component3 = (uint8_t)resourceReturnType;
2570 }
2571
2572 return VINF_SUCCESS;
2573}
2574
2575#ifdef DXBC_STANDALONE_TEST
2576static int dxbcCreateFromBytecode(void const *pvShaderCode, uint32_t cbShaderCode, void **ppvDXBC, uint32_t *pcbDXBC)
2577{
2578 /* Parse the shader bytecode and create DXBC container with resource, signature and shader bytecode blobs. */
2579 DXShaderInfo info;
2580 RT_ZERO(info);
2581 int rc = DXShaderParse(pvShaderCode, cbShaderCode, &info);
2582 if (RT_SUCCESS(rc))
2583 rc = DXShaderCreateDXBC(&info, ppvDXBC, pcbDXBC);
2584 return rc;
2585}
2586
2587static int parseShaderVM(void const *pvShaderCode, uint32_t cbShaderCode)
2588{
2589 void *pv = NULL;
2590 uint32_t cb = 0;
2591 int rc = dxbcCreateFromBytecode(pvShaderCode, cbShaderCode, &pv, &cb);
2592 if (RT_SUCCESS(rc))
2593 {
2594 /* Hexdump DXBC */
2595 printf("{\n");
2596 uint8_t *pu8 = (uint8_t *)pv;
2597 for (uint32_t i = 0; i < cb; ++i)
2598 {
2599 if ((i % 16) == 0)
2600 {
2601 if (i > 0)
2602 printf(",\n");
2603
2604 printf(" 0x%02x", pu8[i]);
2605 }
2606 else
2607 {
2608 printf(", 0x%02x", pu8[i]);
2609 }
2610 }
2611 printf("\n");
2612 printf("};\n");
2613
2614 RTMemFree(pv);
2615 }
2616
2617 return rc;
2618}
2619
2620static DXBCBlobHeader *dxbcFindBlob(DXBCHeader *pDXBCHeader, uint32_t u32BlobType)
2621{
2622 uint8_t const *pu8DXBCBegin = (uint8_t *)pDXBCHeader;
2623 for (uint32_t i = 0; i < pDXBCHeader->cBlob; ++i)
2624 {
2625 DXBCBlobHeader *pCurrentBlob = (DXBCBlobHeader *)&pu8DXBCBegin[pDXBCHeader->aBlobOffset[i]];
2626 if (pCurrentBlob->u32BlobType == u32BlobType)
2627 return pCurrentBlob;
2628 }
2629 return NULL;
2630}
2631
2632static int dxbcExtractShaderCode(DXBCHeader *pDXBCHeader, void **ppvCode, uint32_t *pcbCode)
2633{
2634 DXBCBlobHeader *pBlob = dxbcFindBlob(pDXBCHeader, DXBC_BLOB_TYPE_SHDR);
2635 AssertReturn(pBlob, VERR_NOT_IMPLEMENTED);
2636
2637 DXBCBlobSHDR *pSHDR = (DXBCBlobSHDR *)&pBlob[1];
2638 *pcbCode = pSHDR->cToken * 4;
2639 *ppvCode = RTMemAlloc(*pcbCode);
2640 AssertReturn(*ppvCode, VERR_NO_MEMORY);
2641
2642 memcpy(*ppvCode, pSHDR, *pcbCode);
2643 return VINF_SUCCESS;
2644}
2645
2646static int parseShaderDXBC(void const *pvDXBC)
2647{
2648 DXBCHeader *pDXBCHeader = (DXBCHeader *)pvDXBC;
2649 void *pvShaderCode = NULL;
2650 uint32_t cbShaderCode = 0;
2651 int rc = dxbcExtractShaderCode(pDXBCHeader, &pvShaderCode, &cbShaderCode);
2652 if (RT_SUCCESS(rc))
2653 {
2654 rc = parseShaderVM(pvShaderCode, cbShaderCode);
2655 RTMemFree(pvShaderCode);
2656 }
2657 return rc;
2658}
2659#endif /* DXBC_STANDALONE_TEST */
注意: 瀏覽 TracBrowser 來幫助您使用儲存庫瀏覽器

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette