VirtualBox

source: vbox/trunk/src/VBox/Devices/Graphics/DevVGA-SVGA3d-dx-shader.cpp@ 91801

最後變更 在這個檔案從91801是 91607,由 vboxsync 提交於 3 年 前

Devices/Graphics: parser should be able to apply modifications; logging: bugref:9830

  • 屬性 svn:eol-style 設為 native
  • 屬性 svn:keywords 設為 Author Date Id Revision
檔案大小: 95.8 KB
 
1/* $Id: DevVGA-SVGA3d-dx-shader.cpp 91607 2021-10-07 07:31:24Z vboxsync $ */
2/** @file
3 * DevVMWare - VMWare SVGA device - VGPU10+ (DX) shader utilities.
4 */
5
6/*
7 * Copyright (C) 2020-2021 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.alldomusa.eu.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#define LOG_GROUP LOG_GROUP_DEV_VMSVGA
23#include <VBox/AssertGuest.h>
24#include <VBox/log.h>
25
26#include <iprt/asm.h>
27#include <iprt/md5.h>
28#include <iprt/mem.h>
29#include <iprt/string.h>
30
31#include "DevVGA-SVGA3d-dx-shader.h"
32
33
34/*
35 *
36 * DXBC shader binary format definitions.
37 *
38 */
39
40/* DXBC container header. */
41typedef struct DXBCHeader
42{
43 uint32_t u32DXBC; /* 0x43425844 = 'D', 'X', 'B', 'C' */
44 uint8_t au8Hash[16]; /* Modified MD5 hash. See dxbcHash. */
45 uint32_t u32Version; /* 1 */
46 uint32_t cbTotal; /* Total size in bytes. Including the header. */
47 uint32_t cBlob; /* Number of entries in aBlobOffset array. */
48 uint32_t aBlobOffset[1]; /* Offsets of blobs from the start of DXBC header. */
49} DXBCHeader;
50
51#define DXBC_MAGIC RT_MAKE_U32_FROM_U8('D', 'X', 'B', 'C')
52
53/* DXBC blob header. */
54typedef struct DXBCBlobHeader
55{
56 uint32_t u32BlobType; /* FourCC code. DXBC_BLOB_TYPE_* */
57 uint32_t cbBlob; /* Size of the blob excluding the blob header. 4 bytes aligned. */
58 /* Followed by the blob's data. */
59} DXBCBlobHeader;
60
61/* DXBC blob types. */
62#define DXBC_BLOB_TYPE_ISGN RT_MAKE_U32_FROM_U8('I', 'S', 'G', 'N')
63#define DXBC_BLOB_TYPE_OSGN RT_MAKE_U32_FROM_U8('O', 'S', 'G', 'N')
64#define DXBC_BLOB_TYPE_SHDR RT_MAKE_U32_FROM_U8('S', 'H', 'D', 'R')
65/** @todo More... */
66
67/* 'SHDR' blob data format. */
68typedef struct DXBCBlobSHDR
69{
70 VGPU10ProgramToken programToken;
71 uint32_t cToken; /* Number of 32 bit tokens including programToken and cToken. */
72 uint32_t au32Token[1]; /* cToken - 2 number of tokens. */
73} DXBCBlobSHDR;
74
75/* Element of an input or output signature. */
76typedef struct DXBCBlobIOSGNElement
77{
78 uint32_t offElementName; /* Offset of the semantic's name relative to the start of the blob data. */
79 uint32_t idxSemantic; /* Semantic index. */
80 uint32_t enmSystemValue; /* SVGA3dDXSignatureSemanticName */
81 uint32_t enmComponentType; /* 1 - unsigned, 2 - integer, 3 - float. */
82 uint32_t idxRegister; /* Shader register index. Elements must be sorted by register index. */
83 uint32_t mask : 8; /* Component mask. Lower 4 bits represent X, Y, Z, W channels. */
84 uint32_t mask2 : 8; /* Which components are used in the shader. */
85 uint32_t pad : 16;
86} DXBCBlobIOSGNElement;
87
88/* 'ISGN' and 'OSGN' blob data format. */
89typedef struct DXBCBlobIOSGN
90{
91 uint32_t cElement; /* Number of signature elements. */
92 uint32_t offElement; /* Offset of the first element from the start of the blob. Equals to 8. */
93 DXBCBlobIOSGNElement aElement[1]; /* Signature elements. Size is cElement. */
94 /* Followed by ASCIIZ semantic names. */
95} DXBCBlobIOSGN;
96
97
98/*
99 * VGPU10 shader parser definitions.
100 */
101
102/* Parsed info about an operand index. */
103typedef struct VGPUOperandIndex
104{
105 uint32_t indexRepresentation; /* VGPU10_OPERAND_INDEX_REPRESENTATION */
106 uint64_t iOperandImmediate; /* Needs up to a qword. */
107 struct VGPUOperand *pOperandRelative; /* For VGPU10_OPERAND_INDEX_*RELATIVE */
108} VGPUOperandIndex;
109
110/* Parsed info about an operand. */
111typedef struct VGPUOperand
112{
113 uint32_t numComponents : 2; /* VGPU10_OPERAND_NUM_COMPONENTS */
114 uint32_t selectionMode : 2; /* VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE */
115 uint32_t mask : 4; /* 4-bits X, Y, Z, W mask for VGPU10_OPERAND_4_COMPONENT_MASK_MODE. */
116 uint32_t operandType : 8; /* VGPU10_OPERAND_TYPE */
117 uint32_t indexDimension : 2; /* VGPU10_OPERAND_INDEX_DIMENSION */
118 VGPUOperandIndex aOperandIndex[VGPU10_OPERAND_INDEX_3D]; /* Up to 3. */
119 uint32_t aImm[4]; /* Immediate values for VGPU10_OPERAND_TYPE_IMMEDIATE* */
120} VGPUOperand;
121
122/* Parsed info about an opcode. */
123typedef struct VGPUOpcode
124{
125 uint32_t cOpcodeToken; /* Number of tokens for this operation. */
126 uint32_t opcodeType; /* VGPU10_OPCODE_* */
127 uint32_t semanticName; /* SVGA3dDXSignatureSemanticName for system value declarations. */
128 uint32_t cOperand; /* Number of operands for this instruction. */
129 uint32_t aIdxOperand[8]; /* Indices of the instruction operands in the aValOperand array. */
130 /* 8 should be enough for everyone. */
131 VGPUOperand aValOperand[16]; /* Operands including VGPU10_OPERAND_INDEX_*RELATIVE if they are used: */
132 /* Operand1, VGPU10_OPERAND_INDEX_*RELATIVE for Operand1, ... */
133 /* ... */
134 /* OperandN, VGPU10_OPERAND_INDEX_*RELATIVE for OperandN, ... */
135 /* 16 probably should be enough for everyone. */
136 uint32_t const *paOpcodeToken; /* Pointer to opcode tokens in the input buffer. */
137} VGPUOpcode;
138
139typedef struct VGPUOpcodeInfo
140{
141 uint32_t cOperand; /* Number of operands for this opcode. */
142} VGPUOpcodeInfo;
143
144static VGPUOpcodeInfo const g_aOpcodeInfo[] =
145{
146 { 3 }, /* VGPU10_OPCODE_ADD */
147 { 3 }, /* VGPU10_OPCODE_AND */
148 { 0 }, /* VGPU10_OPCODE_BREAK */
149 { 1 }, /* VGPU10_OPCODE_BREAKC */
150 { 1 }, /* VGPU10_OPCODE_CALL */
151 { 2 }, /* VGPU10_OPCODE_CALLC */
152 { 1 }, /* VGPU10_OPCODE_CASE */
153 { 0 }, /* VGPU10_OPCODE_CONTINUE */
154 { 1 }, /* VGPU10_OPCODE_CONTINUEC */
155 { 0 }, /* VGPU10_OPCODE_CUT */
156 { 0 }, /* VGPU10_OPCODE_DEFAULT */
157 { 2 }, /* VGPU10_OPCODE_DERIV_RTX */
158 { 2 }, /* VGPU10_OPCODE_DERIV_RTY */
159 { 1 }, /* VGPU10_OPCODE_DISCARD */
160 { 3 }, /* VGPU10_OPCODE_DIV */
161 { 3 }, /* VGPU10_OPCODE_DP2 */
162 { 3 }, /* VGPU10_OPCODE_DP3 */
163 { 3 }, /* VGPU10_OPCODE_DP4 */
164 { 0 }, /* VGPU10_OPCODE_ELSE */
165 { 0 }, /* VGPU10_OPCODE_EMIT */
166 { 0 }, /* VGPU10_OPCODE_EMITTHENCUT */
167 { 0 }, /* VGPU10_OPCODE_ENDIF */
168 { 0 }, /* VGPU10_OPCODE_ENDLOOP */
169 { 0 }, /* VGPU10_OPCODE_ENDSWITCH */
170 { 3 }, /* VGPU10_OPCODE_EQ */
171 { 2 }, /* VGPU10_OPCODE_EXP */
172 { 2 }, /* VGPU10_OPCODE_FRC */
173 { 2 }, /* VGPU10_OPCODE_FTOI */
174 { 2 }, /* VGPU10_OPCODE_FTOU */
175 { 3 }, /* VGPU10_OPCODE_GE */
176 { 3 }, /* VGPU10_OPCODE_IADD */
177 { 1 }, /* VGPU10_OPCODE_IF */
178 { 3 }, /* VGPU10_OPCODE_IEQ */
179 { 3 }, /* VGPU10_OPCODE_IGE */
180 { 3 }, /* VGPU10_OPCODE_ILT */
181 { 4 }, /* VGPU10_OPCODE_IMAD */
182 { 3 }, /* VGPU10_OPCODE_IMAX */
183 { 3 }, /* VGPU10_OPCODE_IMIN */
184 { 4 }, /* VGPU10_OPCODE_IMUL */
185 { 3 }, /* VGPU10_OPCODE_INE */
186 { 2 }, /* VGPU10_OPCODE_INEG */
187 { 3 }, /* VGPU10_OPCODE_ISHL */
188 { 3 }, /* VGPU10_OPCODE_ISHR */
189 { 2 }, /* VGPU10_OPCODE_ITOF */
190 { 1 }, /* VGPU10_OPCODE_LABEL */
191 { 3 }, /* VGPU10_OPCODE_LD */
192 { 4 }, /* VGPU10_OPCODE_LD_MS */
193 { 2 }, /* VGPU10_OPCODE_LOG */
194 { 0 }, /* VGPU10_OPCODE_LOOP */
195 { 3 }, /* VGPU10_OPCODE_LT */
196 { 4 }, /* VGPU10_OPCODE_MAD */
197 { 3 }, /* VGPU10_OPCODE_MIN */
198 { 3 }, /* VGPU10_OPCODE_MAX */
199 { UINT32_MAX }, /* VGPU10_OPCODE_CUSTOMDATA: special opcode */
200 { 2 }, /* VGPU10_OPCODE_MOV */
201 { 4 }, /* VGPU10_OPCODE_MOVC */
202 { 3 }, /* VGPU10_OPCODE_MUL */
203 { 3 }, /* VGPU10_OPCODE_NE */
204 { 0 }, /* VGPU10_OPCODE_NOP */
205 { 2 }, /* VGPU10_OPCODE_NOT */
206 { 3 }, /* VGPU10_OPCODE_OR */
207 { 3 }, /* VGPU10_OPCODE_RESINFO */
208 { 0 }, /* VGPU10_OPCODE_RET */
209 { 1 }, /* VGPU10_OPCODE_RETC */
210 { 2 }, /* VGPU10_OPCODE_ROUND_NE */
211 { 2 }, /* VGPU10_OPCODE_ROUND_NI */
212 { 2 }, /* VGPU10_OPCODE_ROUND_PI */
213 { 2 }, /* VGPU10_OPCODE_ROUND_Z */
214 { 2 }, /* VGPU10_OPCODE_RSQ */
215 { 4 }, /* VGPU10_OPCODE_SAMPLE */
216 { 5 }, /* VGPU10_OPCODE_SAMPLE_C */
217 { 5 }, /* VGPU10_OPCODE_SAMPLE_C_LZ */
218 { 5 }, /* VGPU10_OPCODE_SAMPLE_L */
219 { 6 }, /* VGPU10_OPCODE_SAMPLE_D */
220 { 5 }, /* VGPU10_OPCODE_SAMPLE_B */
221 { 2 }, /* VGPU10_OPCODE_SQRT */
222 { 1 }, /* VGPU10_OPCODE_SWITCH */
223 { 3 }, /* VGPU10_OPCODE_SINCOS */
224 { 4 }, /* VGPU10_OPCODE_UDIV */
225 { 3 }, /* VGPU10_OPCODE_ULT */
226 { 3 }, /* VGPU10_OPCODE_UGE */
227 { 4 }, /* VGPU10_OPCODE_UMUL */
228 { 4 }, /* VGPU10_OPCODE_UMAD */
229 { 3 }, /* VGPU10_OPCODE_UMAX */
230 { 3 }, /* VGPU10_OPCODE_UMIN */
231 { 3 }, /* VGPU10_OPCODE_USHR */
232 { 2 }, /* VGPU10_OPCODE_UTOF */
233 { 3 }, /* VGPU10_OPCODE_XOR */
234 { 1 }, /* VGPU10_OPCODE_DCL_RESOURCE */
235 { 1 }, /* VGPU10_OPCODE_DCL_CONSTANT_BUFFER */
236 { 1 }, /* VGPU10_OPCODE_DCL_SAMPLER */
237 { 1 }, /* VGPU10_OPCODE_DCL_INDEX_RANGE */
238 { 0 }, /* VGPU10_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY */
239 { 0 }, /* VGPU10_OPCODE_DCL_GS_INPUT_PRIMITIVE */
240 { 0 }, /* VGPU10_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT */
241 { 1 }, /* VGPU10_OPCODE_DCL_INPUT */
242 { 1 }, /* VGPU10_OPCODE_DCL_INPUT_SGV */
243 { 1 }, /* VGPU10_OPCODE_DCL_INPUT_SIV */
244 { 1 }, /* VGPU10_OPCODE_DCL_INPUT_PS */
245 { 1 }, /* VGPU10_OPCODE_DCL_INPUT_PS_SGV */
246 { 1 }, /* VGPU10_OPCODE_DCL_INPUT_PS_SIV */
247 { 1 }, /* VGPU10_OPCODE_DCL_OUTPUT */
248 { 1 }, /* VGPU10_OPCODE_DCL_OUTPUT_SGV */
249 { 1 }, /* VGPU10_OPCODE_DCL_OUTPUT_SIV */
250 { 0 }, /* VGPU10_OPCODE_DCL_TEMPS */
251 { 0 }, /* VGPU10_OPCODE_DCL_INDEXABLE_TEMP */
252 { 0 }, /* VGPU10_OPCODE_DCL_GLOBAL_FLAGS */
253 { UINT32_MAX }, /* VGPU10_OPCODE_VMWARE: special opcode */
254 { 4 }, /* VGPU10_OPCODE_LOD */
255 { 4 }, /* VGPU10_OPCODE_GATHER4 */
256 { 3 }, /* VGPU10_OPCODE_SAMPLE_POS */
257 { 2 }, /* VGPU10_OPCODE_SAMPLE_INFO */
258 { UINT32_MAX }, /* VGPU10_OPCODE_RESERVED1: special opcode */
259 { 0 }, /* VGPU10_OPCODE_HS_DECLS */
260 { 0 }, /* VGPU10_OPCODE_HS_CONTROL_POINT_PHASE */
261 { 0 }, /* VGPU10_OPCODE_HS_FORK_PHASE */
262 { 0 }, /* VGPU10_OPCODE_HS_JOIN_PHASE */
263 { 1 }, /* VGPU10_OPCODE_EMIT_STREAM */
264 { 1 }, /* VGPU10_OPCODE_CUT_STREAM */
265 { 1 }, /* VGPU10_OPCODE_EMITTHENCUT_STREAM */
266 { 1 }, /* VGPU10_OPCODE_INTERFACE_CALL */
267 { 2 }, /* VGPU10_OPCODE_BUFINFO */
268 { 2 }, /* VGPU10_OPCODE_DERIV_RTX_COARSE */
269 { 2 }, /* VGPU10_OPCODE_DERIV_RTX_FINE */
270 { 2 }, /* VGPU10_OPCODE_DERIV_RTY_COARSE */
271 { 2 }, /* VGPU10_OPCODE_DERIV_RTY_FINE */
272 { 5 }, /* VGPU10_OPCODE_GATHER4_C */
273 { 5 }, /* VGPU10_OPCODE_GATHER4_PO */
274 { 6 }, /* VGPU10_OPCODE_GATHER4_PO_C */
275 { 2 }, /* VGPU10_OPCODE_RCP */
276 { 2 }, /* VGPU10_OPCODE_F32TOF16 */
277 { 2 }, /* VGPU10_OPCODE_F16TOF32 */
278 { 4 }, /* VGPU10_OPCODE_UADDC */
279 { 4 }, /* VGPU10_OPCODE_USUBB */
280 { 2 }, /* VGPU10_OPCODE_COUNTBITS */
281 { 2 }, /* VGPU10_OPCODE_FIRSTBIT_HI */
282 { 2 }, /* VGPU10_OPCODE_FIRSTBIT_LO */
283 { 2 }, /* VGPU10_OPCODE_FIRSTBIT_SHI */
284 { 4 }, /* VGPU10_OPCODE_UBFE */
285 { 4 }, /* VGPU10_OPCODE_IBFE */
286 { 5 }, /* VGPU10_OPCODE_BFI */
287 { 2 }, /* VGPU10_OPCODE_BFREV */
288 { 5 }, /* VGPU10_OPCODE_SWAPC */
289 { 1 }, /* VGPU10_OPCODE_DCL_STREAM */
290 { 0 }, /* VGPU10_OPCODE_DCL_FUNCTION_BODY */
291 { 0 }, /* VGPU10_OPCODE_DCL_FUNCTION_TABLE */
292 { 0 }, /* VGPU10_OPCODE_DCL_INTERFACE */
293 { 0 }, /* VGPU10_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT */
294 { 0 }, /* VGPU10_OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT */
295 { 0 }, /* VGPU10_OPCODE_DCL_TESS_DOMAIN */
296 { 0 }, /* VGPU10_OPCODE_DCL_TESS_PARTITIONING */
297 { 0 }, /* VGPU10_OPCODE_DCL_TESS_OUTPUT_PRIMITIVE */
298 { 0 }, /* VGPU10_OPCODE_DCL_HS_MAX_TESSFACTOR */
299 { 0 }, /* VGPU10_OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT */
300 { 0 }, /* VGPU10_OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT */
301 { 0 }, /* VGPU10_OPCODE_DCL_THREAD_GROUP */
302 { 1 }, /* VGPU10_OPCODE_DCL_UAV_TYPED */
303 { 1 }, /* VGPU10_OPCODE_DCL_UAV_RAW */
304 { 1 }, /* VGPU10_OPCODE_DCL_UAV_STRUCTURED */
305 { 1 }, /* VGPU10_OPCODE_DCL_TGSM_RAW */
306 { 1 }, /* VGPU10_OPCODE_DCL_TGSM_STRUCTURED */
307 { 1 }, /* VGPU10_OPCODE_DCL_RESOURCE_RAW */
308 { 1 }, /* VGPU10_OPCODE_DCL_RESOURCE_STRUCTURED */
309 { 3 }, /* VGPU10_OPCODE_LD_UAV_TYPED */
310 { 3 }, /* VGPU10_OPCODE_STORE_UAV_TYPED */
311 { 3 }, /* VGPU10_OPCODE_LD_RAW */
312 { 3 }, /* VGPU10_OPCODE_STORE_RAW */
313 { 4 }, /* VGPU10_OPCODE_LD_STRUCTURED */
314 { 4 }, /* VGPU10_OPCODE_STORE_STRUCTURED */
315 { 3 }, /* VGPU10_OPCODE_ATOMIC_AND */
316 { 3 }, /* VGPU10_OPCODE_ATOMIC_OR */
317 { 3 }, /* VGPU10_OPCODE_ATOMIC_XOR */
318 { 4 }, /* VGPU10_OPCODE_ATOMIC_CMP_STORE */
319 { 3 }, /* VGPU10_OPCODE_ATOMIC_IADD */
320 { 3 }, /* VGPU10_OPCODE_ATOMIC_IMAX */
321 { 3 }, /* VGPU10_OPCODE_ATOMIC_IMIN */
322 { 3 }, /* VGPU10_OPCODE_ATOMIC_UMAX */
323 { 3 }, /* VGPU10_OPCODE_ATOMIC_UMIN */
324 { 2 }, /* VGPU10_OPCODE_IMM_ATOMIC_ALLOC */
325 { 2 }, /* VGPU10_OPCODE_IMM_ATOMIC_CONSUME */
326 { 4 }, /* VGPU10_OPCODE_IMM_ATOMIC_IADD */
327 { 4 }, /* VGPU10_OPCODE_IMM_ATOMIC_AND */
328 { 4 }, /* VGPU10_OPCODE_IMM_ATOMIC_OR */
329 { 4 }, /* VGPU10_OPCODE_IMM_ATOMIC_XOR */
330 { 4 }, /* VGPU10_OPCODE_IMM_ATOMIC_EXCH */
331 { 5 }, /* VGPU10_OPCODE_IMM_ATOMIC_CMP_EXCH */
332 { 4 }, /* VGPU10_OPCODE_IMM_ATOMIC_IMAX */
333 { 4 }, /* VGPU10_OPCODE_IMM_ATOMIC_IMIN */
334 { 4 }, /* VGPU10_OPCODE_IMM_ATOMIC_UMAX */
335 { 4 }, /* VGPU10_OPCODE_IMM_ATOMIC_UMIN */
336 { 0 }, /* VGPU10_OPCODE_SYNC */
337 { 3 }, /* VGPU10_OPCODE_DADD */
338 { 3 }, /* VGPU10_OPCODE_DMAX */
339 { 3 }, /* VGPU10_OPCODE_DMIN */
340 { 3 }, /* VGPU10_OPCODE_DMUL */
341 { 3 }, /* VGPU10_OPCODE_DEQ */
342 { 3 }, /* VGPU10_OPCODE_DGE */
343 { 3 }, /* VGPU10_OPCODE_DLT */
344 { 3 }, /* VGPU10_OPCODE_DNE */
345 { 2 }, /* VGPU10_OPCODE_DMOV */
346 { 4 }, /* VGPU10_OPCODE_DMOVC */
347 { 2 }, /* VGPU10_OPCODE_DTOF */
348 { 2 }, /* VGPU10_OPCODE_FTOD */
349 { 3 }, /* VGPU10_OPCODE_EVAL_SNAPPED */
350 { 3 }, /* VGPU10_OPCODE_EVAL_SAMPLE_INDEX */
351 { 2 }, /* VGPU10_OPCODE_EVAL_CENTROID */
352 { 0 }, /* VGPU10_OPCODE_DCL_GS_INSTANCE_COUNT */
353 { 0 }, /* VGPU10_OPCODE_ABORT */
354 { 0 }, /* VGPU10_OPCODE_DEBUG_BREAK */
355 { 0 }, /* VGPU10_OPCODE_RESERVED0 */
356 { 3 }, /* VGPU10_OPCODE_DDIV */
357 { 4 }, /* VGPU10_OPCODE_DFMA */
358 { 2 }, /* VGPU10_OPCODE_DRCP */
359 { 4 }, /* VGPU10_OPCODE_MSAD */
360 { 2 }, /* VGPU10_OPCODE_DTOI */
361 { 2 }, /* VGPU10_OPCODE_DTOU */
362 { 2 }, /* VGPU10_OPCODE_ITOD */
363 { 2 }, /* VGPU10_OPCODE_UTOD */
364};
365AssertCompile(RT_ELEMENTS(g_aOpcodeInfo) == VGPU10_NUM_OPCODES);
366
367#ifdef LOG_ENABLED
368/*
369 *
370 * Helpers to translate a VGPU10 shader constant to a string.
371 *
372 */
373
374#define SVGA_CASE_ID2STR(idx) case idx: return #idx
375
376static const char *dxbcOpcodeToString(uint32_t opcodeType)
377{
378 VGPU10_OPCODE_TYPE enm = (VGPU10_OPCODE_TYPE)opcodeType;
379 switch (enm)
380 {
381 SVGA_CASE_ID2STR(VGPU10_OPCODE_ADD);
382 SVGA_CASE_ID2STR(VGPU10_OPCODE_AND);
383 SVGA_CASE_ID2STR(VGPU10_OPCODE_BREAK);
384 SVGA_CASE_ID2STR(VGPU10_OPCODE_BREAKC);
385 SVGA_CASE_ID2STR(VGPU10_OPCODE_CALL);
386 SVGA_CASE_ID2STR(VGPU10_OPCODE_CALLC);
387 SVGA_CASE_ID2STR(VGPU10_OPCODE_CASE);
388 SVGA_CASE_ID2STR(VGPU10_OPCODE_CONTINUE);
389 SVGA_CASE_ID2STR(VGPU10_OPCODE_CONTINUEC);
390 SVGA_CASE_ID2STR(VGPU10_OPCODE_CUT);
391 SVGA_CASE_ID2STR(VGPU10_OPCODE_DEFAULT);
392 SVGA_CASE_ID2STR(VGPU10_OPCODE_DERIV_RTX);
393 SVGA_CASE_ID2STR(VGPU10_OPCODE_DERIV_RTY);
394 SVGA_CASE_ID2STR(VGPU10_OPCODE_DISCARD);
395 SVGA_CASE_ID2STR(VGPU10_OPCODE_DIV);
396 SVGA_CASE_ID2STR(VGPU10_OPCODE_DP2);
397 SVGA_CASE_ID2STR(VGPU10_OPCODE_DP3);
398 SVGA_CASE_ID2STR(VGPU10_OPCODE_DP4);
399 SVGA_CASE_ID2STR(VGPU10_OPCODE_ELSE);
400 SVGA_CASE_ID2STR(VGPU10_OPCODE_EMIT);
401 SVGA_CASE_ID2STR(VGPU10_OPCODE_EMITTHENCUT);
402 SVGA_CASE_ID2STR(VGPU10_OPCODE_ENDIF);
403 SVGA_CASE_ID2STR(VGPU10_OPCODE_ENDLOOP);
404 SVGA_CASE_ID2STR(VGPU10_OPCODE_ENDSWITCH);
405 SVGA_CASE_ID2STR(VGPU10_OPCODE_EQ);
406 SVGA_CASE_ID2STR(VGPU10_OPCODE_EXP);
407 SVGA_CASE_ID2STR(VGPU10_OPCODE_FRC);
408 SVGA_CASE_ID2STR(VGPU10_OPCODE_FTOI);
409 SVGA_CASE_ID2STR(VGPU10_OPCODE_FTOU);
410 SVGA_CASE_ID2STR(VGPU10_OPCODE_GE);
411 SVGA_CASE_ID2STR(VGPU10_OPCODE_IADD);
412 SVGA_CASE_ID2STR(VGPU10_OPCODE_IF);
413 SVGA_CASE_ID2STR(VGPU10_OPCODE_IEQ);
414 SVGA_CASE_ID2STR(VGPU10_OPCODE_IGE);
415 SVGA_CASE_ID2STR(VGPU10_OPCODE_ILT);
416 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMAD);
417 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMAX);
418 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMIN);
419 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMUL);
420 SVGA_CASE_ID2STR(VGPU10_OPCODE_INE);
421 SVGA_CASE_ID2STR(VGPU10_OPCODE_INEG);
422 SVGA_CASE_ID2STR(VGPU10_OPCODE_ISHL);
423 SVGA_CASE_ID2STR(VGPU10_OPCODE_ISHR);
424 SVGA_CASE_ID2STR(VGPU10_OPCODE_ITOF);
425 SVGA_CASE_ID2STR(VGPU10_OPCODE_LABEL);
426 SVGA_CASE_ID2STR(VGPU10_OPCODE_LD);
427 SVGA_CASE_ID2STR(VGPU10_OPCODE_LD_MS);
428 SVGA_CASE_ID2STR(VGPU10_OPCODE_LOG);
429 SVGA_CASE_ID2STR(VGPU10_OPCODE_LOOP);
430 SVGA_CASE_ID2STR(VGPU10_OPCODE_LT);
431 SVGA_CASE_ID2STR(VGPU10_OPCODE_MAD);
432 SVGA_CASE_ID2STR(VGPU10_OPCODE_MIN);
433 SVGA_CASE_ID2STR(VGPU10_OPCODE_MAX);
434 SVGA_CASE_ID2STR(VGPU10_OPCODE_CUSTOMDATA);
435 SVGA_CASE_ID2STR(VGPU10_OPCODE_MOV);
436 SVGA_CASE_ID2STR(VGPU10_OPCODE_MOVC);
437 SVGA_CASE_ID2STR(VGPU10_OPCODE_MUL);
438 SVGA_CASE_ID2STR(VGPU10_OPCODE_NE);
439 SVGA_CASE_ID2STR(VGPU10_OPCODE_NOP);
440 SVGA_CASE_ID2STR(VGPU10_OPCODE_NOT);
441 SVGA_CASE_ID2STR(VGPU10_OPCODE_OR);
442 SVGA_CASE_ID2STR(VGPU10_OPCODE_RESINFO);
443 SVGA_CASE_ID2STR(VGPU10_OPCODE_RET);
444 SVGA_CASE_ID2STR(VGPU10_OPCODE_RETC);
445 SVGA_CASE_ID2STR(VGPU10_OPCODE_ROUND_NE);
446 SVGA_CASE_ID2STR(VGPU10_OPCODE_ROUND_NI);
447 SVGA_CASE_ID2STR(VGPU10_OPCODE_ROUND_PI);
448 SVGA_CASE_ID2STR(VGPU10_OPCODE_ROUND_Z);
449 SVGA_CASE_ID2STR(VGPU10_OPCODE_RSQ);
450 SVGA_CASE_ID2STR(VGPU10_OPCODE_SAMPLE);
451 SVGA_CASE_ID2STR(VGPU10_OPCODE_SAMPLE_C);
452 SVGA_CASE_ID2STR(VGPU10_OPCODE_SAMPLE_C_LZ);
453 SVGA_CASE_ID2STR(VGPU10_OPCODE_SAMPLE_L);
454 SVGA_CASE_ID2STR(VGPU10_OPCODE_SAMPLE_D);
455 SVGA_CASE_ID2STR(VGPU10_OPCODE_SAMPLE_B);
456 SVGA_CASE_ID2STR(VGPU10_OPCODE_SQRT);
457 SVGA_CASE_ID2STR(VGPU10_OPCODE_SWITCH);
458 SVGA_CASE_ID2STR(VGPU10_OPCODE_SINCOS);
459 SVGA_CASE_ID2STR(VGPU10_OPCODE_UDIV);
460 SVGA_CASE_ID2STR(VGPU10_OPCODE_ULT);
461 SVGA_CASE_ID2STR(VGPU10_OPCODE_UGE);
462 SVGA_CASE_ID2STR(VGPU10_OPCODE_UMUL);
463 SVGA_CASE_ID2STR(VGPU10_OPCODE_UMAD);
464 SVGA_CASE_ID2STR(VGPU10_OPCODE_UMAX);
465 SVGA_CASE_ID2STR(VGPU10_OPCODE_UMIN);
466 SVGA_CASE_ID2STR(VGPU10_OPCODE_USHR);
467 SVGA_CASE_ID2STR(VGPU10_OPCODE_UTOF);
468 SVGA_CASE_ID2STR(VGPU10_OPCODE_XOR);
469 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_RESOURCE);
470 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_CONSTANT_BUFFER);
471 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_SAMPLER);
472 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INDEX_RANGE);
473 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY);
474 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_GS_INPUT_PRIMITIVE);
475 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT);
476 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INPUT);
477 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INPUT_SGV);
478 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INPUT_SIV);
479 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INPUT_PS);
480 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INPUT_PS_SGV);
481 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INPUT_PS_SIV);
482 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_OUTPUT);
483 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_OUTPUT_SGV);
484 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_OUTPUT_SIV);
485 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_TEMPS);
486 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INDEXABLE_TEMP);
487 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_GLOBAL_FLAGS);
488 SVGA_CASE_ID2STR(VGPU10_OPCODE_VMWARE);
489 SVGA_CASE_ID2STR(VGPU10_OPCODE_LOD);
490 SVGA_CASE_ID2STR(VGPU10_OPCODE_GATHER4);
491 SVGA_CASE_ID2STR(VGPU10_OPCODE_SAMPLE_POS);
492 SVGA_CASE_ID2STR(VGPU10_OPCODE_SAMPLE_INFO);
493 SVGA_CASE_ID2STR(VGPU10_OPCODE_RESERVED1);
494 SVGA_CASE_ID2STR(VGPU10_OPCODE_HS_DECLS);
495 SVGA_CASE_ID2STR(VGPU10_OPCODE_HS_CONTROL_POINT_PHASE);
496 SVGA_CASE_ID2STR(VGPU10_OPCODE_HS_FORK_PHASE);
497 SVGA_CASE_ID2STR(VGPU10_OPCODE_HS_JOIN_PHASE);
498 SVGA_CASE_ID2STR(VGPU10_OPCODE_EMIT_STREAM);
499 SVGA_CASE_ID2STR(VGPU10_OPCODE_CUT_STREAM);
500 SVGA_CASE_ID2STR(VGPU10_OPCODE_EMITTHENCUT_STREAM);
501 SVGA_CASE_ID2STR(VGPU10_OPCODE_INTERFACE_CALL);
502 SVGA_CASE_ID2STR(VGPU10_OPCODE_BUFINFO);
503 SVGA_CASE_ID2STR(VGPU10_OPCODE_DERIV_RTX_COARSE);
504 SVGA_CASE_ID2STR(VGPU10_OPCODE_DERIV_RTX_FINE);
505 SVGA_CASE_ID2STR(VGPU10_OPCODE_DERIV_RTY_COARSE);
506 SVGA_CASE_ID2STR(VGPU10_OPCODE_DERIV_RTY_FINE);
507 SVGA_CASE_ID2STR(VGPU10_OPCODE_GATHER4_C);
508 SVGA_CASE_ID2STR(VGPU10_OPCODE_GATHER4_PO);
509 SVGA_CASE_ID2STR(VGPU10_OPCODE_GATHER4_PO_C);
510 SVGA_CASE_ID2STR(VGPU10_OPCODE_RCP);
511 SVGA_CASE_ID2STR(VGPU10_OPCODE_F32TOF16);
512 SVGA_CASE_ID2STR(VGPU10_OPCODE_F16TOF32);
513 SVGA_CASE_ID2STR(VGPU10_OPCODE_UADDC);
514 SVGA_CASE_ID2STR(VGPU10_OPCODE_USUBB);
515 SVGA_CASE_ID2STR(VGPU10_OPCODE_COUNTBITS);
516 SVGA_CASE_ID2STR(VGPU10_OPCODE_FIRSTBIT_HI);
517 SVGA_CASE_ID2STR(VGPU10_OPCODE_FIRSTBIT_LO);
518 SVGA_CASE_ID2STR(VGPU10_OPCODE_FIRSTBIT_SHI);
519 SVGA_CASE_ID2STR(VGPU10_OPCODE_UBFE);
520 SVGA_CASE_ID2STR(VGPU10_OPCODE_IBFE);
521 SVGA_CASE_ID2STR(VGPU10_OPCODE_BFI);
522 SVGA_CASE_ID2STR(VGPU10_OPCODE_BFREV);
523 SVGA_CASE_ID2STR(VGPU10_OPCODE_SWAPC);
524 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_STREAM);
525 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_FUNCTION_BODY);
526 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_FUNCTION_TABLE);
527 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INTERFACE);
528 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT);
529 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT);
530 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_TESS_DOMAIN);
531 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_TESS_PARTITIONING);
532 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_TESS_OUTPUT_PRIMITIVE);
533 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_HS_MAX_TESSFACTOR);
534 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT);
535 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT);
536 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_THREAD_GROUP);
537 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_UAV_TYPED);
538 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_UAV_RAW);
539 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_UAV_STRUCTURED);
540 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_TGSM_RAW);
541 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_TGSM_STRUCTURED);
542 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_RESOURCE_RAW);
543 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_RESOURCE_STRUCTURED);
544 SVGA_CASE_ID2STR(VGPU10_OPCODE_LD_UAV_TYPED);
545 SVGA_CASE_ID2STR(VGPU10_OPCODE_STORE_UAV_TYPED);
546 SVGA_CASE_ID2STR(VGPU10_OPCODE_LD_RAW);
547 SVGA_CASE_ID2STR(VGPU10_OPCODE_STORE_RAW);
548 SVGA_CASE_ID2STR(VGPU10_OPCODE_LD_STRUCTURED);
549 SVGA_CASE_ID2STR(VGPU10_OPCODE_STORE_STRUCTURED);
550 SVGA_CASE_ID2STR(VGPU10_OPCODE_ATOMIC_AND);
551 SVGA_CASE_ID2STR(VGPU10_OPCODE_ATOMIC_OR);
552 SVGA_CASE_ID2STR(VGPU10_OPCODE_ATOMIC_XOR);
553 SVGA_CASE_ID2STR(VGPU10_OPCODE_ATOMIC_CMP_STORE);
554 SVGA_CASE_ID2STR(VGPU10_OPCODE_ATOMIC_IADD);
555 SVGA_CASE_ID2STR(VGPU10_OPCODE_ATOMIC_IMAX);
556 SVGA_CASE_ID2STR(VGPU10_OPCODE_ATOMIC_IMIN);
557 SVGA_CASE_ID2STR(VGPU10_OPCODE_ATOMIC_UMAX);
558 SVGA_CASE_ID2STR(VGPU10_OPCODE_ATOMIC_UMIN);
559 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_ALLOC);
560 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_CONSUME);
561 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_IADD);
562 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_AND);
563 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_OR);
564 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_XOR);
565 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_EXCH);
566 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_CMP_EXCH);
567 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_IMAX);
568 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_IMIN);
569 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_UMAX);
570 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_UMIN);
571 SVGA_CASE_ID2STR(VGPU10_OPCODE_SYNC);
572 SVGA_CASE_ID2STR(VGPU10_OPCODE_DADD);
573 SVGA_CASE_ID2STR(VGPU10_OPCODE_DMAX);
574 SVGA_CASE_ID2STR(VGPU10_OPCODE_DMIN);
575 SVGA_CASE_ID2STR(VGPU10_OPCODE_DMUL);
576 SVGA_CASE_ID2STR(VGPU10_OPCODE_DEQ);
577 SVGA_CASE_ID2STR(VGPU10_OPCODE_DGE);
578 SVGA_CASE_ID2STR(VGPU10_OPCODE_DLT);
579 SVGA_CASE_ID2STR(VGPU10_OPCODE_DNE);
580 SVGA_CASE_ID2STR(VGPU10_OPCODE_DMOV);
581 SVGA_CASE_ID2STR(VGPU10_OPCODE_DMOVC);
582 SVGA_CASE_ID2STR(VGPU10_OPCODE_DTOF);
583 SVGA_CASE_ID2STR(VGPU10_OPCODE_FTOD);
584 SVGA_CASE_ID2STR(VGPU10_OPCODE_EVAL_SNAPPED);
585 SVGA_CASE_ID2STR(VGPU10_OPCODE_EVAL_SAMPLE_INDEX);
586 SVGA_CASE_ID2STR(VGPU10_OPCODE_EVAL_CENTROID);
587 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_GS_INSTANCE_COUNT);
588 SVGA_CASE_ID2STR(VGPU10_OPCODE_ABORT);
589 SVGA_CASE_ID2STR(VGPU10_OPCODE_DEBUG_BREAK);
590 SVGA_CASE_ID2STR(VGPU10_OPCODE_RESERVED0);
591 SVGA_CASE_ID2STR(VGPU10_OPCODE_DDIV);
592 SVGA_CASE_ID2STR(VGPU10_OPCODE_DFMA);
593 SVGA_CASE_ID2STR(VGPU10_OPCODE_DRCP);
594 SVGA_CASE_ID2STR(VGPU10_OPCODE_MSAD);
595 SVGA_CASE_ID2STR(VGPU10_OPCODE_DTOI);
596 SVGA_CASE_ID2STR(VGPU10_OPCODE_DTOU);
597 SVGA_CASE_ID2STR(VGPU10_OPCODE_ITOD);
598 SVGA_CASE_ID2STR(VGPU10_OPCODE_UTOD);
599 SVGA_CASE_ID2STR(VGPU10_NUM_OPCODES);
600 }
601 return NULL;
602}
603
604
605static const char *dxbcShaderTypeToString(uint32_t value)
606{
607 VGPU10_PROGRAM_TYPE enm = (VGPU10_PROGRAM_TYPE)value;
608 switch (enm)
609 {
610 SVGA_CASE_ID2STR(VGPU10_PIXEL_SHADER);
611 SVGA_CASE_ID2STR(VGPU10_VERTEX_SHADER);
612 SVGA_CASE_ID2STR(VGPU10_GEOMETRY_SHADER);
613 SVGA_CASE_ID2STR(VGPU10_HULL_SHADER);
614 SVGA_CASE_ID2STR(VGPU10_DOMAIN_SHADER);
615 SVGA_CASE_ID2STR(VGPU10_COMPUTE_SHADER);
616 }
617 return NULL;
618}
619
620
621static const char *dxbcCustomDataClassToString(uint32_t value)
622{
623 VGPU10_CUSTOMDATA_CLASS enm = (VGPU10_CUSTOMDATA_CLASS)value;
624 switch (enm)
625 {
626 SVGA_CASE_ID2STR(VGPU10_CUSTOMDATA_COMMENT);
627 SVGA_CASE_ID2STR(VGPU10_CUSTOMDATA_DEBUGINFO);
628 SVGA_CASE_ID2STR(VGPU10_CUSTOMDATA_OPAQUE);
629 SVGA_CASE_ID2STR(VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER);
630 }
631 return NULL;
632}
633
634
635static const char *dxbcSystemNameToString(uint32_t value)
636{
637 VGPU10_SYSTEM_NAME enm = (VGPU10_SYSTEM_NAME)value;
638 switch (enm)
639 {
640 SVGA_CASE_ID2STR(VGPU10_NAME_UNDEFINED);
641 SVGA_CASE_ID2STR(VGPU10_NAME_POSITION);
642 SVGA_CASE_ID2STR(VGPU10_NAME_CLIP_DISTANCE);
643 SVGA_CASE_ID2STR(VGPU10_NAME_CULL_DISTANCE);
644 SVGA_CASE_ID2STR(VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX);
645 SVGA_CASE_ID2STR(VGPU10_NAME_VIEWPORT_ARRAY_INDEX);
646 SVGA_CASE_ID2STR(VGPU10_NAME_VERTEX_ID);
647 SVGA_CASE_ID2STR(VGPU10_NAME_PRIMITIVE_ID);
648 SVGA_CASE_ID2STR(VGPU10_NAME_INSTANCE_ID);
649 SVGA_CASE_ID2STR(VGPU10_NAME_IS_FRONT_FACE);
650 SVGA_CASE_ID2STR(VGPU10_NAME_SAMPLE_INDEX);
651 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR);
652 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR);
653 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR);
654 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR);
655 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR);
656 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR);
657 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR);
658 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR);
659 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR);
660 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_TRI_INSIDE_TESSFACTOR);
661 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_LINE_DETAIL_TESSFACTOR);
662 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_LINE_DENSITY_TESSFACTOR);
663 }
664 return NULL;
665}
666
667
668static const char *dxbcOperandTypeToString(uint32_t value)
669{
670 VGPU10_OPERAND_TYPE enm = (VGPU10_OPERAND_TYPE)value;
671 switch (enm)
672 {
673 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_TEMP);
674 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT);
675 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_OUTPUT);
676 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INDEXABLE_TEMP);
677 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_IMMEDIATE32);
678 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_IMMEDIATE64);
679 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_SAMPLER);
680 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_RESOURCE);
681 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_CONSTANT_BUFFER);
682 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER);
683 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_LABEL);
684 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID);
685 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_OUTPUT_DEPTH);
686 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_NULL);
687 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_RASTERIZER);
688 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK);
689 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_STREAM);
690 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_FUNCTION_BODY);
691 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_FUNCTION_TABLE);
692 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INTERFACE);
693 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_FUNCTION_INPUT);
694 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_FUNCTION_OUTPUT);
695 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID);
696 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_FORK_INSTANCE_ID);
697 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_JOIN_INSTANCE_ID);
698 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT);
699 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT);
700 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT);
701 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT);
702 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_THIS_POINTER);
703 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_UAV);
704 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY);
705 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_THREAD_ID);
706 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_THREAD_GROUP_ID);
707 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP);
708 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK);
709 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED);
710 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID);
711 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL);
712 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL);
713 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_CYCLE_COUNTER);
714 SVGA_CASE_ID2STR(VGPU10_NUM_OPERANDS);
715 }
716 return NULL;
717}
718
719
720static const char *dxbcOperandNumComponentsToString(uint32_t value)
721{
722 VGPU10_OPERAND_NUM_COMPONENTS enm = (VGPU10_OPERAND_NUM_COMPONENTS)value;
723 switch (enm)
724 {
725 SVGA_CASE_ID2STR(VGPU10_OPERAND_0_COMPONENT);
726 SVGA_CASE_ID2STR(VGPU10_OPERAND_1_COMPONENT);
727 SVGA_CASE_ID2STR(VGPU10_OPERAND_4_COMPONENT);
728 SVGA_CASE_ID2STR(VGPU10_OPERAND_N_COMPONENT);
729 }
730 return NULL;
731}
732
733
734static const char *dxbcOperandComponentModeToString(uint32_t value)
735{
736 VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE enm = (VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE)value;
737 switch (enm)
738 {
739 SVGA_CASE_ID2STR(VGPU10_OPERAND_4_COMPONENT_MASK_MODE);
740 SVGA_CASE_ID2STR(VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE);
741 SVGA_CASE_ID2STR(VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE);
742 }
743 return NULL;
744}
745
746
747static const char *dxbcOperandComponentNameToString(uint32_t value)
748{
749 VGPU10_COMPONENT_NAME enm = (VGPU10_COMPONENT_NAME)value;
750 switch (enm)
751 {
752 SVGA_CASE_ID2STR(VGPU10_COMPONENT_X);
753 SVGA_CASE_ID2STR(VGPU10_COMPONENT_Y);
754 SVGA_CASE_ID2STR(VGPU10_COMPONENT_Z);
755 SVGA_CASE_ID2STR(VGPU10_COMPONENT_W);
756 }
757 return NULL;
758}
759
760
761static const char *dxbcOperandIndexDimensionToString(uint32_t value)
762{
763 VGPU10_OPERAND_INDEX_DIMENSION enm = (VGPU10_OPERAND_INDEX_DIMENSION)value;
764 switch (enm)
765 {
766 SVGA_CASE_ID2STR(VGPU10_OPERAND_INDEX_0D);
767 SVGA_CASE_ID2STR(VGPU10_OPERAND_INDEX_1D);
768 SVGA_CASE_ID2STR(VGPU10_OPERAND_INDEX_2D);
769 SVGA_CASE_ID2STR(VGPU10_OPERAND_INDEX_3D);
770 }
771 return NULL;
772}
773
774
775static const char *dxbcOperandIndexRepresentationToString(uint32_t value)
776{
777 VGPU10_OPERAND_INDEX_REPRESENTATION enm = (VGPU10_OPERAND_INDEX_REPRESENTATION)value;
778 switch (enm)
779 {
780 SVGA_CASE_ID2STR(VGPU10_OPERAND_INDEX_IMMEDIATE32);
781 SVGA_CASE_ID2STR(VGPU10_OPERAND_INDEX_IMMEDIATE64);
782 SVGA_CASE_ID2STR(VGPU10_OPERAND_INDEX_RELATIVE);
783 SVGA_CASE_ID2STR(VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE);
784 SVGA_CASE_ID2STR(VGPU10_OPERAND_INDEX_IMMEDIATE64_PLUS_RELATIVE);
785 }
786 return NULL;
787}
788
789
790static const char *dxbcInterpolationModeToString(uint32_t value)
791{
792 VGPU10_INTERPOLATION_MODE enm = (VGPU10_INTERPOLATION_MODE)value;
793 switch (enm)
794 {
795 SVGA_CASE_ID2STR(VGPU10_INTERPOLATION_UNDEFINED);
796 SVGA_CASE_ID2STR(VGPU10_INTERPOLATION_CONSTANT);
797 SVGA_CASE_ID2STR(VGPU10_INTERPOLATION_LINEAR);
798 SVGA_CASE_ID2STR(VGPU10_INTERPOLATION_LINEAR_CENTROID);
799 SVGA_CASE_ID2STR(VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE);
800 SVGA_CASE_ID2STR(VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID);
801 SVGA_CASE_ID2STR(VGPU10_INTERPOLATION_LINEAR_SAMPLE);
802 SVGA_CASE_ID2STR(VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE);
803 }
804 return NULL;
805}
806
807
808static const char *dxbcResourceDimensionToString(uint32_t value)
809{
810 VGPU10_RESOURCE_DIMENSION enm = (VGPU10_RESOURCE_DIMENSION)value;
811 switch (enm)
812 {
813 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_UNKNOWN);
814 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_BUFFER);
815 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_TEXTURE1D);
816 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_TEXTURE2D);
817 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS);
818 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_TEXTURE3D);
819 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_TEXTURECUBE);
820 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_TEXTURE1DARRAY);
821 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY);
822 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY);
823 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY);
824 }
825 return NULL;
826}
827
828
829static const char *dxbcVmwareOpcodeTypeToString(uint32_t value)
830{
831 VGPU10_VMWARE_OPCODE_TYPE enm = (VGPU10_VMWARE_OPCODE_TYPE)value;
832 switch (enm)
833 {
834 SVGA_CASE_ID2STR(VGPU10_VMWARE_OPCODE_IDIV);
835 SVGA_CASE_ID2STR(VGPU10_VMWARE_OPCODE_DFRC);
836 SVGA_CASE_ID2STR(VGPU10_VMWARE_OPCODE_DRSQ);
837 SVGA_CASE_ID2STR(VGPU10_VMWARE_NUM_OPCODES);
838 }
839 return NULL;
840}
841
842#endif /* LOG_ENABLED */
843
844/*
845 * MD5 from IPRT (alt-md5.cpp) for DXBC hash calculation.
846 * DXBC hash function uses a different padding for the data, see dxbcHash.
847 * Therefore RTMd5Final is not needed. Two functions have been renamed: dxbcRTMd5Update dxbcRTMd5Init.
848 */
849
850
851/* The four core functions - F1 is optimized somewhat */
852/* #define F1(x, y, z) (x & y | ~x & z) */
853#define F1(x, y, z) (z ^ (x & (y ^ z)))
854#define F2(x, y, z) F1(z, x, y)
855#define F3(x, y, z) (x ^ y ^ z)
856#define F4(x, y, z) (y ^ (x | ~z))
857
858
859/* This is the central step in the MD5 algorithm. */
860#define MD5STEP(f, w, x, y, z, data, s) \
861 ( w += f(x, y, z) + data, w = w<<s | w>>(32-s), w += x )
862
863
864/**
865 * The core of the MD5 algorithm, this alters an existing MD5 hash to reflect
866 * the addition of 16 longwords of new data. RTMd5Update blocks the data and
867 * converts bytes into longwords for this routine.
868 */
869static void rtMd5Transform(uint32_t buf[4], uint32_t const in[16])
870{
871 uint32_t a, b, c, d;
872
873 a = buf[0];
874 b = buf[1];
875 c = buf[2];
876 d = buf[3];
877
878 /* fn, w, x, y, z, data, s) */
879 MD5STEP(F1, a, b, c, d, in[ 0] + 0xd76aa478, 7);
880 MD5STEP(F1, d, a, b, c, in[ 1] + 0xe8c7b756, 12);
881 MD5STEP(F1, c, d, a, b, in[ 2] + 0x242070db, 17);
882 MD5STEP(F1, b, c, d, a, in[ 3] + 0xc1bdceee, 22);
883 MD5STEP(F1, a, b, c, d, in[ 4] + 0xf57c0faf, 7);
884 MD5STEP(F1, d, a, b, c, in[ 5] + 0x4787c62a, 12);
885 MD5STEP(F1, c, d, a, b, in[ 6] + 0xa8304613, 17);
886 MD5STEP(F1, b, c, d, a, in[ 7] + 0xfd469501, 22);
887 MD5STEP(F1, a, b, c, d, in[ 8] + 0x698098d8, 7);
888 MD5STEP(F1, d, a, b, c, in[ 9] + 0x8b44f7af, 12);
889 MD5STEP(F1, c, d, a, b, in[10] + 0xffff5bb1, 17);
890 MD5STEP(F1, b, c, d, a, in[11] + 0x895cd7be, 22);
891 MD5STEP(F1, a, b, c, d, in[12] + 0x6b901122, 7);
892 MD5STEP(F1, d, a, b, c, in[13] + 0xfd987193, 12);
893 MD5STEP(F1, c, d, a, b, in[14] + 0xa679438e, 17);
894 MD5STEP(F1, b, c, d, a, in[15] + 0x49b40821, 22);
895
896 MD5STEP(F2, a, b, c, d, in[ 1] + 0xf61e2562, 5);
897 MD5STEP(F2, d, a, b, c, in[ 6] + 0xc040b340, 9);
898 MD5STEP(F2, c, d, a, b, in[11] + 0x265e5a51, 14);
899 MD5STEP(F2, b, c, d, a, in[ 0] + 0xe9b6c7aa, 20);
900 MD5STEP(F2, a, b, c, d, in[ 5] + 0xd62f105d, 5);
901 MD5STEP(F2, d, a, b, c, in[10] + 0x02441453, 9);
902 MD5STEP(F2, c, d, a, b, in[15] + 0xd8a1e681, 14);
903 MD5STEP(F2, b, c, d, a, in[ 4] + 0xe7d3fbc8, 20);
904 MD5STEP(F2, a, b, c, d, in[ 9] + 0x21e1cde6, 5);
905 MD5STEP(F2, d, a, b, c, in[14] + 0xc33707d6, 9);
906 MD5STEP(F2, c, d, a, b, in[ 3] + 0xf4d50d87, 14);
907 MD5STEP(F2, b, c, d, a, in[ 8] + 0x455a14ed, 20);
908 MD5STEP(F2, a, b, c, d, in[13] + 0xa9e3e905, 5);
909 MD5STEP(F2, d, a, b, c, in[ 2] + 0xfcefa3f8, 9);
910 MD5STEP(F2, c, d, a, b, in[ 7] + 0x676f02d9, 14);
911 MD5STEP(F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20);
912
913 MD5STEP(F3, a, b, c, d, in[ 5] + 0xfffa3942, 4);
914 MD5STEP(F3, d, a, b, c, in[ 8] + 0x8771f681, 11);
915 MD5STEP(F3, c, d, a, b, in[11] + 0x6d9d6122, 16);
916 MD5STEP(F3, b, c, d, a, in[14] + 0xfde5380c, 23);
917 MD5STEP(F3, a, b, c, d, in[ 1] + 0xa4beea44, 4);
918 MD5STEP(F3, d, a, b, c, in[ 4] + 0x4bdecfa9, 11);
919 MD5STEP(F3, c, d, a, b, in[ 7] + 0xf6bb4b60, 16);
920 MD5STEP(F3, b, c, d, a, in[10] + 0xbebfbc70, 23);
921 MD5STEP(F3, a, b, c, d, in[13] + 0x289b7ec6, 4);
922 MD5STEP(F3, d, a, b, c, in[ 0] + 0xeaa127fa, 11);
923 MD5STEP(F3, c, d, a, b, in[ 3] + 0xd4ef3085, 16);
924 MD5STEP(F3, b, c, d, a, in[ 6] + 0x04881d05, 23);
925 MD5STEP(F3, a, b, c, d, in[ 9] + 0xd9d4d039, 4);
926 MD5STEP(F3, d, a, b, c, in[12] + 0xe6db99e5, 11);
927 MD5STEP(F3, c, d, a, b, in[15] + 0x1fa27cf8, 16);
928 MD5STEP(F3, b, c, d, a, in[ 2] + 0xc4ac5665, 23);
929
930 MD5STEP(F4, a, b, c, d, in[ 0] + 0xf4292244, 6);
931 MD5STEP(F4, d, a, b, c, in[ 7] + 0x432aff97, 10);
932 MD5STEP(F4, c, d, a, b, in[14] + 0xab9423a7, 15);
933 MD5STEP(F4, b, c, d, a, in[ 5] + 0xfc93a039, 21);
934 MD5STEP(F4, a, b, c, d, in[12] + 0x655b59c3, 6);
935 MD5STEP(F4, d, a, b, c, in[ 3] + 0x8f0ccc92, 10);
936 MD5STEP(F4, c, d, a, b, in[10] + 0xffeff47d, 15);
937 MD5STEP(F4, b, c, d, a, in[ 1] + 0x85845dd1, 21);
938 MD5STEP(F4, a, b, c, d, in[ 8] + 0x6fa87e4f, 6);
939 MD5STEP(F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10);
940 MD5STEP(F4, c, d, a, b, in[ 6] + 0xa3014314, 15);
941 MD5STEP(F4, b, c, d, a, in[13] + 0x4e0811a1, 21);
942 MD5STEP(F4, a, b, c, d, in[ 4] + 0xf7537e82, 6);
943 MD5STEP(F4, d, a, b, c, in[11] + 0xbd3af235, 10);
944 MD5STEP(F4, c, d, a, b, in[ 2] + 0x2ad7d2bb, 15);
945 MD5STEP(F4, b, c, d, a, in[ 9] + 0xeb86d391, 21);
946
947 buf[0] += a;
948 buf[1] += b;
949 buf[2] += c;
950 buf[3] += d;
951}
952
953
954#ifdef RT_BIG_ENDIAN
955/*
956 * Note: this code is harmless on little-endian machines.
957 */
958static void rtMd5ByteReverse(uint32_t *buf, unsigned int longs)
959{
960 uint32_t t;
961 do
962 {
963 t = *buf;
964 t = RT_LE2H_U32(t);
965 *buf = t;
966 buf++;
967 } while (--longs);
968}
969#else /* little endian - do nothing */
970# define rtMd5ByteReverse(buf, len) do { /* Nothing */ } while (0)
971#endif
972
973
974/*
975 * Start MD5 accumulation. Set bit count to 0 and buffer to mysterious
976 * initialization constants.
977 */
978static void dxbcRTMd5Init(PRTMD5CONTEXT pCtx)
979{
980 pCtx->AltPrivate.buf[0] = 0x67452301;
981 pCtx->AltPrivate.buf[1] = 0xefcdab89;
982 pCtx->AltPrivate.buf[2] = 0x98badcfe;
983 pCtx->AltPrivate.buf[3] = 0x10325476;
984
985 pCtx->AltPrivate.bits[0] = 0;
986 pCtx->AltPrivate.bits[1] = 0;
987}
988
989
990/*
991 * Update context to reflect the concatenation of another buffer full
992 * of bytes.
993 */
994/** @todo Optimize this, because len is always a multiple of 64. */
995static void dxbcRTMd5Update(PRTMD5CONTEXT pCtx, const void *pvBuf, size_t len)
996{
997 const uint8_t *buf = (const uint8_t *)pvBuf;
998 uint32_t t;
999
1000 /* Update bitcount */
1001 t = pCtx->AltPrivate.bits[0];
1002 if ((pCtx->AltPrivate.bits[0] = t + ((uint32_t) len << 3)) < t)
1003 pCtx->AltPrivate.bits[1]++; /* Carry from low to high */
1004 pCtx->AltPrivate.bits[1] += (uint32_t)(len >> 29);
1005
1006 t = (t >> 3) & 0x3f; /* Bytes already in shsInfo->data */
1007
1008 /* Handle any leading odd-sized chunks */
1009 if (t)
1010 {
1011 uint8_t *p = (uint8_t *) pCtx->AltPrivate.in + t;
1012
1013 t = 64 - t;
1014 if (len < t)
1015 {
1016 memcpy(p, buf, len);
1017 return;
1018 }
1019 memcpy(p, buf, t);
1020 rtMd5ByteReverse(pCtx->AltPrivate.in, 16);
1021 rtMd5Transform(pCtx->AltPrivate.buf, pCtx->AltPrivate.in);
1022 buf += t;
1023 len -= t;
1024 }
1025
1026 /* Process data in 64-byte chunks */
1027#ifndef RT_BIG_ENDIAN
1028 if (!((uintptr_t)buf & 0x3))
1029 {
1030 while (len >= 64) {
1031 rtMd5Transform(pCtx->AltPrivate.buf, (uint32_t const *)buf);
1032 buf += 64;
1033 len -= 64;
1034 }
1035 }
1036 else
1037#endif
1038 {
1039 while (len >= 64) {
1040 memcpy(pCtx->AltPrivate.in, buf, 64);
1041 rtMd5ByteReverse(pCtx->AltPrivate.in, 16);
1042 rtMd5Transform(pCtx->AltPrivate.buf, pCtx->AltPrivate.in);
1043 buf += 64;
1044 len -= 64;
1045 }
1046 }
1047
1048 /* Handle any remaining bytes of data */
1049 memcpy(pCtx->AltPrivate.in, buf, len);
1050}
1051
1052
1053static void dxbcHash(void const *pvData, uint32_t cbData, uint8_t pabDigest[RTMD5HASHSIZE])
1054{
1055 size_t const kBlockSize = 64;
1056 uint8_t au8BlockBuffer[kBlockSize];
1057
1058 static uint8_t const s_au8Padding[kBlockSize] =
1059 {
1060 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1061 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1062 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1063 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1064 };
1065
1066 RTMD5CONTEXT Ctx;
1067 PRTMD5CONTEXT const pCtx = &Ctx;
1068 dxbcRTMd5Init(pCtx);
1069
1070 uint8_t const *pu8Data = (uint8_t *)pvData;
1071 size_t cbRemaining = cbData;
1072
1073 size_t const cbCompleteBlocks = cbData & ~ (kBlockSize - 1);
1074 dxbcRTMd5Update(pCtx, pu8Data, cbCompleteBlocks);
1075 pu8Data += cbCompleteBlocks;
1076 cbRemaining -= cbCompleteBlocks;
1077
1078 /* Custom padding. */
1079 if (cbRemaining >= kBlockSize - 2 * sizeof(uint32_t))
1080 {
1081 /* Two additional blocks. */
1082 memcpy(&au8BlockBuffer[0], pu8Data, cbRemaining);
1083 memcpy(&au8BlockBuffer[cbRemaining], s_au8Padding, kBlockSize - cbRemaining);
1084 dxbcRTMd5Update(pCtx, au8BlockBuffer, kBlockSize);
1085
1086 memset(&au8BlockBuffer[sizeof(uint32_t)], 0, kBlockSize - 2 * sizeof(uint32_t));
1087 }
1088 else
1089 {
1090 /* One additional block. */
1091 memcpy(&au8BlockBuffer[sizeof(uint32_t)], pu8Data, cbRemaining);
1092 memcpy(&au8BlockBuffer[sizeof(uint32_t) + cbRemaining], s_au8Padding, kBlockSize - cbRemaining - 2 * sizeof(uint32_t));
1093 }
1094
1095 /* Set the first and last dwords of the last block. */
1096 *(uint32_t *)&au8BlockBuffer[0] = cbData << 3;
1097 *(uint32_t *)&au8BlockBuffer[kBlockSize - sizeof(uint32_t)] = (cbData << 1) | 1;
1098 dxbcRTMd5Update(pCtx, au8BlockBuffer, kBlockSize);
1099
1100 AssertCompile(sizeof(pCtx->AltPrivate.buf) == RTMD5HASHSIZE);
1101 memcpy(pabDigest, pCtx->AltPrivate.buf, RTMD5HASHSIZE);
1102}
1103
1104
1105/*
1106 *
1107 * Shader token reader.
1108 *
1109 */
1110
1111typedef struct DXBCTokenReader
1112{
1113 uint32_t const *pToken; /* Next token to read. */
1114 uint32_t cToken; /* How many tokens total. */
1115 uint32_t cRemainingToken; /* How many tokens remain. */
1116} DXBCTokenReader;
1117
1118
1119#ifdef LOG_ENABLED
1120DECLINLINE(uint32_t) dxbcTokenReaderByteOffset(DXBCTokenReader *r)
1121{
1122 return (r->cToken - r->cRemainingToken) * 4;
1123}
1124#endif
1125
1126
1127#if 0 // Unused for now
1128DECLINLINE(uint32_t) dxbcTokenReaderRemaining(DXBCTokenReader *r)
1129{
1130 return r->cRemainingToken;
1131}
1132#endif
1133
1134
1135DECLINLINE(uint32_t const *) dxbcTokenReaderPtr(DXBCTokenReader *r)
1136{
1137 return r->pToken;
1138}
1139
1140
1141DECLINLINE(bool) dxbcTokenReaderCanRead(DXBCTokenReader *r, uint32_t cToken)
1142{
1143 return cToken <= r->cRemainingToken;
1144}
1145
1146
1147DECLINLINE(void) dxbcTokenReaderSkip(DXBCTokenReader *r, uint32_t cToken)
1148{
1149 AssertReturnVoid(r->cRemainingToken >= cToken);
1150 r->cRemainingToken -= cToken;
1151 r->pToken += cToken;
1152}
1153
1154
1155DECLINLINE(uint32_t) dxbcTokenReaderRead32(DXBCTokenReader *r)
1156{
1157 AssertReturn(r->cRemainingToken, 0);
1158 --r->cRemainingToken;
1159 return *(r->pToken++);
1160}
1161
1162
1163DECLINLINE(uint64_t) dxbcTokenReaderRead64(DXBCTokenReader *r)
1164{
1165 uint64_t const u64Low = dxbcTokenReaderRead32(r);
1166 uint64_t const u64High = dxbcTokenReaderRead32(r);
1167 return u64Low + (u64High << 32);
1168}
1169
1170
1171/*
1172 *
1173 * Byte writer.
1174 *
1175 */
1176
1177typedef struct DXBCByteWriter
1178{
1179 uint8_t *pu8ByteCodeBegin; /* First byte of the buffer. */
1180 uint8_t *pu8ByteCodePtr; /* Next free byte. */
1181 uint32_t cbAllocated; /* How many bytes allocated in the buffer. */
1182 uint32_t cbRemaining; /* How many bytes remain in the buffer. */
1183 int32_t rc;
1184} DXBCByteWriter;
1185
1186
1187DECLINLINE(void *) dxbcByteWriterPtr(DXBCByteWriter *w)
1188{
1189 return w->pu8ByteCodePtr;
1190}
1191
1192
1193DECLINLINE(uint32_t) dxbcByteWriterSize(DXBCByteWriter *w)
1194{
1195 return (uint32_t)(w->pu8ByteCodePtr - w->pu8ByteCodeBegin);
1196}
1197
1198
1199DECLINLINE(void) dxbcByteWriterCommit(DXBCByteWriter *w, uint32_t cbCommit)
1200{
1201 Assert(cbCommit < w->cbRemaining);
1202 cbCommit = RT_MIN(cbCommit, w->cbRemaining);
1203 w->pu8ByteCodePtr += cbCommit;
1204 w->cbRemaining -= cbCommit;
1205}
1206
1207
1208DECLINLINE(bool) dxbcByteWriterCanWrite(DXBCByteWriter *w, uint32_t cbMore)
1209{
1210 if (cbMore <= w->cbRemaining)
1211 return true;
1212
1213 /* Do not allow to allocate more than 2 * SVGA3D_MAX_SHADER_MEMORY_BYTES */
1214 uint32_t const cbMax = 2 * SVGA3D_MAX_SHADER_MEMORY_BYTES;
1215 AssertReturnStmt(cbMore < cbMax && RT_ALIGN_32(cbMore, 4096) <= cbMax - w->cbAllocated, w->rc = VERR_INVALID_PARAMETER, false);
1216
1217 uint32_t cbNew = w->cbAllocated + RT_ALIGN_32(cbMore, 4096);
1218 void *pvNew = RTMemAllocZ(cbNew);
1219 if (!pvNew)
1220 {
1221 w->rc = VERR_NO_MEMORY;
1222 return false;
1223 }
1224
1225 uint32_t const cbCurrent = dxbcByteWriterSize(w);
1226 memcpy(pvNew, w->pu8ByteCodeBegin, cbCurrent);
1227 RTMemFree(w->pu8ByteCodeBegin);
1228
1229 w->pu8ByteCodeBegin = (uint8_t *)pvNew;
1230 w->pu8ByteCodePtr = w->pu8ByteCodeBegin + cbCurrent;
1231 w->cbAllocated = cbNew;
1232 w->cbRemaining = cbNew - cbCurrent;
1233
1234 return true;
1235}
1236
1237
1238DECLINLINE(bool) dxbcByteWriterAddTokens(DXBCByteWriter *w, uint32_t const *paToken, uint32_t cToken)
1239{
1240 uint32_t const cbWrite = cToken * sizeof(uint32_t);
1241 if (dxbcByteWriterCanWrite(w, cbWrite))
1242 {
1243 memcpy(dxbcByteWriterPtr(w), paToken, cbWrite);
1244 dxbcByteWriterCommit(w, cbWrite);
1245 return true;
1246 }
1247
1248 AssertFailed();
1249 return false;
1250}
1251
1252
1253DECLINLINE(bool) dxbcByteWriterInit(DXBCByteWriter *w, uint32_t cbInitial)
1254{
1255 RT_ZERO(*w);
1256 return dxbcByteWriterCanWrite(w, cbInitial);
1257}
1258
1259
1260DECLINLINE(void) dxbcByteWriterReset(DXBCByteWriter *w)
1261{
1262 RTMemFree(w->pu8ByteCodeBegin);
1263 RT_ZERO(*w);
1264}
1265
1266
1267DECLINLINE(void) dxbcByteWriterFetchData(DXBCByteWriter *w, void **ppv, uint32_t *pcb)
1268{
1269 *ppv = w->pu8ByteCodeBegin;
1270 *pcb = dxbcByteWriterSize(w);
1271
1272 w->pu8ByteCodeBegin = NULL;
1273 dxbcByteWriterReset(w);
1274}
1275
1276
1277/*
1278 *
1279 * VGPU10 shader parser.
1280 *
1281 */
1282
1283/* Parse an instruction operand. */
1284static int dxbcParseOperand(DXBCTokenReader *r, VGPUOperand *paOperand, uint32_t *pcOperandRemain)
1285{
1286 ASSERT_GUEST_RETURN(*pcOperandRemain > 0, VERR_NOT_SUPPORTED);
1287
1288 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1289
1290 VGPU10OperandToken0 operand0;
1291 operand0.value = dxbcTokenReaderRead32(r);
1292
1293 Log6((" %s(%d) %s(%d) %s(%d) %s(%d)\n",
1294 dxbcOperandNumComponentsToString(operand0.numComponents), operand0.numComponents,
1295 dxbcOperandComponentModeToString(operand0.selectionMode), operand0.selectionMode,
1296 dxbcOperandTypeToString(operand0.operandType), operand0.operandType,
1297 dxbcOperandIndexDimensionToString(operand0.indexDimension), operand0.indexDimension));
1298
1299 ASSERT_GUEST_RETURN(operand0.numComponents <= VGPU10_OPERAND_4_COMPONENT, VERR_INVALID_PARAMETER);
1300 if ( operand0.operandType != VGPU10_OPERAND_TYPE_IMMEDIATE32
1301 && operand0.operandType != VGPU10_OPERAND_TYPE_IMMEDIATE64)
1302 {
1303 if (operand0.numComponents == VGPU10_OPERAND_4_COMPONENT)
1304 {
1305 ASSERT_GUEST_RETURN(operand0.selectionMode <= VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE, VERR_INVALID_PARAMETER);
1306 switch (operand0.selectionMode)
1307 {
1308 case VGPU10_OPERAND_4_COMPONENT_MASK_MODE:
1309 Log6((" Mask %#x\n", operand0.mask));
1310 break;
1311 case VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE:
1312 Log6((" Swizzle %s(%d) %s(%d) %s(%d) %s(%d)\n",
1313 dxbcOperandComponentNameToString(operand0.swizzleX), operand0.swizzleX,
1314 dxbcOperandComponentNameToString(operand0.swizzleY), operand0.swizzleY,
1315 dxbcOperandComponentNameToString(operand0.swizzleZ), operand0.swizzleZ,
1316 dxbcOperandComponentNameToString(operand0.swizzleW), operand0.swizzleW));
1317 break;
1318 case VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE:
1319 Log6((" Select %s(%d)\n",
1320 dxbcOperandComponentNameToString(operand0.selectMask), operand0.selectMask));
1321 break;
1322 default: /* Never happens. */
1323 break;
1324 }
1325 }
1326 }
1327
1328 if (operand0.extended)
1329 {
1330 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1331
1332 VGPU10OperandToken1 operand1;
1333 operand1.value = dxbcTokenReaderRead32(r);
1334 }
1335
1336 ASSERT_GUEST_RETURN(operand0.operandType < VGPU10_NUM_OPERANDS, VERR_INVALID_PARAMETER);
1337
1338 if ( operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32
1339 || operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE64)
1340 {
1341 uint32_t cComponent = 0;
1342 if (operand0.numComponents == VGPU10_OPERAND_4_COMPONENT)
1343 cComponent = 4;
1344 else if (operand0.numComponents == VGPU10_OPERAND_1_COMPONENT)
1345 cComponent = 1;
1346
1347 for (uint32_t i = 0; i < cComponent; ++i)
1348 {
1349 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1350 paOperand->aImm[i] = dxbcTokenReaderRead32(r);
1351 }
1352 }
1353
1354 paOperand->numComponents = operand0.numComponents;
1355 paOperand->selectionMode = operand0.selectionMode;
1356 paOperand->mask = operand0.mask;
1357 paOperand->operandType = operand0.operandType;
1358 paOperand->indexDimension = operand0.indexDimension;
1359
1360 int rc = VINF_SUCCESS;
1361 /* 'indexDimension' tells the number of indices. 'i' is the array index, i.e. i = 0 for 1D, etc. */
1362 for (uint32_t i = 0; i < operand0.indexDimension; ++i)
1363 {
1364 if (i == 0) /* VGPU10_OPERAND_INDEX_1D */
1365 paOperand->aOperandIndex[i].indexRepresentation = operand0.index0Representation;
1366 else if (i == 1) /* VGPU10_OPERAND_INDEX_2D */
1367 paOperand->aOperandIndex[i].indexRepresentation = operand0.index1Representation;
1368 else /* VGPU10_OPERAND_INDEX_3D */
1369 continue; /* Skip because it is "rarely if ever used" and is not supported by VGPU10. */
1370
1371 uint32_t const indexRepresentation = paOperand->aOperandIndex[i].indexRepresentation;
1372 switch (indexRepresentation)
1373 {
1374 case VGPU10_OPERAND_INDEX_IMMEDIATE32:
1375 {
1376 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1377 paOperand->aOperandIndex[i].iOperandImmediate = dxbcTokenReaderRead32(r);
1378 break;
1379 }
1380 case VGPU10_OPERAND_INDEX_IMMEDIATE64:
1381 {
1382 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 2), VERR_INVALID_PARAMETER);
1383 paOperand->aOperandIndex[i].iOperandImmediate = dxbcTokenReaderRead64(r);
1384 break;
1385 }
1386 case VGPU10_OPERAND_INDEX_RELATIVE:
1387 {
1388 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1389 paOperand->aOperandIndex[i].pOperandRelative = &paOperand[1];
1390 Log6((" [operand index %d] parsing relative\n", i));
1391 rc = dxbcParseOperand(r, &paOperand[1], pcOperandRemain);
1392 break;
1393 }
1394 case VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE:
1395 {
1396 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 2), VERR_INVALID_PARAMETER);
1397 paOperand->aOperandIndex[i].iOperandImmediate = dxbcTokenReaderRead32(r);
1398 paOperand->aOperandIndex[i].pOperandRelative = &paOperand[1];
1399 Log6((" [operand index %d] parsing relative\n", i));
1400 rc = dxbcParseOperand(r, &paOperand[1], pcOperandRemain);
1401 break;
1402 }
1403 case VGPU10_OPERAND_INDEX_IMMEDIATE64_PLUS_RELATIVE:
1404 {
1405 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 3), VERR_INVALID_PARAMETER);
1406 paOperand->aOperandIndex[i].iOperandImmediate = dxbcTokenReaderRead64(r);
1407 paOperand->aOperandIndex[i].pOperandRelative = &paOperand[1];
1408 Log6((" [operand index %d] parsing relative\n", i));
1409 rc = dxbcParseOperand(r, &paOperand[1], pcOperandRemain);
1410 break;
1411 }
1412 default:
1413 ASSERT_GUEST_FAILED_RETURN(VERR_INVALID_PARAMETER);
1414 }
1415 Log6((" [operand index %d] %s(%d): %#llx%s\n",
1416 i, dxbcOperandIndexRepresentationToString(indexRepresentation), indexRepresentation,
1417 paOperand->aOperandIndex[i].iOperandImmediate, paOperand->aOperandIndex[i].pOperandRelative ? " + relative" : ""));
1418 if (RT_FAILURE(rc))
1419 break;
1420 }
1421
1422 *pcOperandRemain -= 1;
1423 return VINF_SUCCESS;
1424}
1425
1426
1427/* Parse an instruction. */
1428static int dxbcParseOpcode(DXBCTokenReader *r, VGPUOpcode *pOpcode)
1429{
1430 RT_ZERO(*pOpcode);
1431 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1432
1433 pOpcode->paOpcodeToken = dxbcTokenReaderPtr(r);
1434
1435 VGPU10OpcodeToken0 opcode;
1436 opcode.value = dxbcTokenReaderRead32(r);
1437
1438 pOpcode->opcodeType = opcode.opcodeType;
1439 ASSERT_GUEST_RETURN(pOpcode->opcodeType < VGPU10_NUM_OPCODES, VERR_INVALID_PARAMETER);
1440
1441 uint32_t const cOperand = g_aOpcodeInfo[pOpcode->opcodeType].cOperand;
1442 if (cOperand != UINT32_MAX)
1443 {
1444#ifdef LOG_ENABLED
1445 if (pOpcode->opcodeType == VGPU10_OPCODE_DCL_RESOURCE)
1446 Log6(("[%#x] %s length %d %s\n",
1447 dxbcTokenReaderByteOffset(r) - 4, dxbcOpcodeToString(pOpcode->opcodeType), opcode.instructionLength, dxbcResourceDimensionToString(opcode.resourceDimension)));
1448 else
1449 Log6(("[%#x] %s length %d %s\n",
1450 dxbcTokenReaderByteOffset(r) - 4, dxbcOpcodeToString(pOpcode->opcodeType), opcode.instructionLength, dxbcInterpolationModeToString(opcode.interpolationMode)));
1451#endif
1452
1453 ASSERT_GUEST_RETURN(cOperand < RT_ELEMENTS(pOpcode->aIdxOperand), VERR_INVALID_PARAMETER);
1454
1455 pOpcode->cOpcodeToken = opcode.instructionLength;
1456 if (opcode.extended)
1457 {
1458 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1459 if ( pOpcode->opcodeType == VGPU10_OPCODE_DCL_FUNCTION_BODY
1460 || pOpcode->opcodeType == VGPU10_OPCODE_DCL_FUNCTION_TABLE
1461 || pOpcode->opcodeType == VGPU10_OPCODE_DCL_INTERFACE
1462 || pOpcode->opcodeType == VGPU10_OPCODE_INTERFACE_CALL
1463 || pOpcode->opcodeType == VGPU10_OPCODE_DCL_THREAD_GROUP)
1464 {
1465 /* "next DWORD contains ... the actual instruction length in DWORD since it may not fit into 7 bits" */
1466 pOpcode->cOpcodeToken = dxbcTokenReaderRead32(r);
1467 }
1468 else
1469 AssertFailedReturn(VERR_NOT_IMPLEMENTED); /** @todo Anything else special for extended opcodes. */
1470 }
1471
1472 ASSERT_GUEST_RETURN(pOpcode->cOpcodeToken >= 1 && pOpcode->cOpcodeToken < 256, VERR_INVALID_PARAMETER);
1473 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, pOpcode->cOpcodeToken - 1), VERR_INVALID_PARAMETER);
1474
1475 /* Additional tokens before operands. */
1476 switch (pOpcode->opcodeType)
1477 {
1478 case VGPU10_OPCODE_INTERFACE_CALL:
1479 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1480 dxbcTokenReaderSkip(r, 1); /* Function index */
1481 break;
1482
1483 default:
1484 break;
1485 }
1486
1487 /* Operands. */
1488 uint32_t cOperandRemain = RT_ELEMENTS(pOpcode->aValOperand);
1489 for (uint32_t i = 0; i < cOperand; ++i)
1490 {
1491 Log6((" [operand %d]\n", i));
1492 uint32_t const idxOperand = RT_ELEMENTS(pOpcode->aValOperand) - cOperandRemain;
1493 pOpcode->aIdxOperand[i] = idxOperand;
1494 int rc = dxbcParseOperand(r, &pOpcode->aValOperand[idxOperand], &cOperandRemain);
1495 ASSERT_GUEST_RETURN(RT_SUCCESS(rc), VERR_INVALID_PARAMETER);
1496 }
1497
1498 pOpcode->cOperand = cOperand;
1499
1500 /* Additional tokens after operands. */
1501 switch (pOpcode->opcodeType)
1502 {
1503 case VGPU10_OPCODE_DCL_INPUT_SIV:
1504 case VGPU10_OPCODE_DCL_INPUT_SGV:
1505 case VGPU10_OPCODE_DCL_INPUT_PS_SIV:
1506 case VGPU10_OPCODE_DCL_INPUT_PS_SGV:
1507 case VGPU10_OPCODE_DCL_OUTPUT_SIV:
1508 case VGPU10_OPCODE_DCL_OUTPUT_SGV:
1509 {
1510 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1511
1512 VGPU10NameToken name;
1513 name.value = dxbcTokenReaderRead32(r);
1514 Log6((" %s(%d)\n",
1515 dxbcSystemNameToString(name.name), name.name));
1516 pOpcode->semanticName = name.name;
1517 break;
1518 }
1519 case VGPU10_OPCODE_DCL_RESOURCE:
1520 {
1521 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1522 dxbcTokenReaderSkip(r, 1); /* ResourceReturnTypeToken */
1523 break;
1524 }
1525 case VGPU10_OPCODE_DCL_TEMPS:
1526 {
1527 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1528 dxbcTokenReaderSkip(r, 1); /* number of temps */
1529 break;
1530 }
1531 case VGPU10_OPCODE_DCL_INDEXABLE_TEMP:
1532 {
1533 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 3), VERR_INVALID_PARAMETER);
1534 dxbcTokenReaderSkip(r, 3); /* register index; number of registers; number of components */
1535 break;
1536 }
1537 case VGPU10_OPCODE_DCL_INDEX_RANGE:
1538 {
1539 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1540 dxbcTokenReaderSkip(r, 1); /* count of registers */
1541 break;
1542 }
1543 case VGPU10_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT:
1544 {
1545 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1546 dxbcTokenReaderSkip(r, 1); /* maximum number of primitives */
1547 break;
1548 }
1549 case VGPU10_OPCODE_DCL_GS_INSTANCE_COUNT:
1550 {
1551 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1552 dxbcTokenReaderSkip(r, 1); /* number of instances */
1553 break;
1554 }
1555 case VGPU10_OPCODE_DCL_HS_MAX_TESSFACTOR:
1556 {
1557 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1558 dxbcTokenReaderSkip(r, 1); /* maximum TessFactor */
1559 break;
1560 }
1561 case VGPU10_OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT:
1562 case VGPU10_OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT:
1563 {
1564 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1565 dxbcTokenReaderSkip(r, 1); /* number of instances of the current fork/join phase program to execute */
1566 break;
1567 }
1568 case VGPU10_OPCODE_DCL_THREAD_GROUP:
1569 {
1570 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 3), VERR_INVALID_PARAMETER);
1571 dxbcTokenReaderSkip(r, 3); /* Thread Group dimensions as UINT32: x, y, z */
1572 break;
1573 }
1574 case VGPU10_OPCODE_DCL_UAV_TYPED:
1575 {
1576 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1577 dxbcTokenReaderSkip(r, 1); /* ResourceReturnTypeToken */
1578 break;
1579 }
1580 case VGPU10_OPCODE_DCL_UAV_STRUCTURED:
1581 {
1582 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1583 dxbcTokenReaderSkip(r, 1); /* byte stride */
1584 break;
1585 }
1586 case VGPU10_OPCODE_DCL_TGSM_RAW:
1587 {
1588 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1589 dxbcTokenReaderSkip(r, 1); /* element count */
1590 break;
1591 }
1592 case VGPU10_OPCODE_DCL_TGSM_STRUCTURED:
1593 {
1594 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 2), VERR_INVALID_PARAMETER);
1595 dxbcTokenReaderSkip(r, 2); /* struct byte stride; struct count */
1596 break;
1597 }
1598 case VGPU10_OPCODE_DCL_RESOURCE_STRUCTURED:
1599 {
1600 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1601 dxbcTokenReaderSkip(r, 1); /* struct byte stride */
1602 break;
1603 }
1604 default:
1605 break;
1606 }
1607 }
1608 else
1609 {
1610 /* Special opcodes. */
1611 if (pOpcode->opcodeType == VGPU10_OPCODE_CUSTOMDATA)
1612 {
1613 Log6(("[%#x] %s %s\n",
1614 dxbcTokenReaderByteOffset(r) - 4, dxbcOpcodeToString(pOpcode->opcodeType), dxbcCustomDataClassToString(opcode.customDataClass)));
1615
1616 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1617 pOpcode->cOpcodeToken = dxbcTokenReaderRead32(r);
1618
1619 if (pOpcode->cOpcodeToken < 2)
1620 pOpcode->cOpcodeToken = 2;
1621 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, pOpcode->cOpcodeToken - 2), VERR_INVALID_PARAMETER);
1622
1623 dxbcTokenReaderSkip(r, pOpcode->cOpcodeToken - 2);
1624 }
1625 else if (pOpcode->opcodeType == VGPU10_OPCODE_VMWARE)
1626 {
1627 Log6(("[%#x] %s %s(%d)\n",
1628 dxbcTokenReaderByteOffset(r) - 4, dxbcOpcodeToString(pOpcode->opcodeType), dxbcVmwareOpcodeTypeToString(opcode.vmwareOpcodeType), opcode.vmwareOpcodeType));
1629
1630 /** @todo implement */
1631 ASSERT_GUEST_FAILED_RETURN(VERR_INVALID_PARAMETER);
1632 }
1633 else
1634 ASSERT_GUEST_FAILED_RETURN(VERR_INVALID_PARAMETER);
1635
1636 // pOpcode->cOperand = 0;
1637 }
1638
1639 return VINF_SUCCESS;
1640}
1641
1642
1643typedef struct DXBCOUTPUTCTX
1644{
1645 VGPU10ProgramToken programToken;
1646 bool fEmulateOpcodeVmware;
1647} DXBCOUTPUTCTX;
1648
1649
1650void dxbcOutputInit(DXBCOUTPUTCTX *pOutctx, VGPU10ProgramToken const *pProgramToken, uint32_t cToken)
1651{
1652 RT_NOREF(pProgramToken, cToken);
1653 RT_ZERO(*pOutctx);
1654 pOutctx->programToken = *pProgramToken;
1655}
1656
1657
1658int dxbcOutputOpcode(DXBCOUTPUTCTX *pOutctx, DXBCByteWriter *w, VGPUOpcode *pOpcode)
1659{
1660 if ( pOutctx->programToken.programType == VGPU10_PIXEL_SHADER
1661 && pOpcode->opcodeType == VGPU10_OPCODE_DCL_RESOURCE)
1662 {
1663 /** @todo This is a workaround. */
1664 /* Sometimes the guest (Mesa) created a shader with uninitialized resource dimension.
1665 * Use texture 2d because buffer is not what a pixel shader normally uses.
1666 */
1667 ASSERT_GUEST_RETURN(pOpcode->cOpcodeToken == 4, VERR_INVALID_PARAMETER);
1668
1669 VGPU10OpcodeToken0 opcode;
1670 opcode.value = pOpcode->paOpcodeToken[0];
1671 if (opcode.resourceDimension == VGPU10_RESOURCE_DIMENSION_BUFFER)
1672 {
1673 opcode.resourceDimension = VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
1674 dxbcByteWriterAddTokens(w, &opcode.value, 1);
1675 dxbcByteWriterAddTokens(w, &pOpcode->paOpcodeToken[1], 2);
1676 uint32_t const returnType = 0x5555; /* float */
1677 dxbcByteWriterAddTokens(w, &returnType, 1);
1678 return VINF_SUCCESS;
1679 }
1680 }
1681
1682 dxbcByteWriterAddTokens(w, pOpcode->paOpcodeToken, pOpcode->cOpcodeToken);
1683 return VINF_SUCCESS;
1684}
1685
1686
1687int dxbcOutputFinalize(DXBCOUTPUTCTX *pOutctx, DXBCByteWriter *w)
1688{
1689 RT_NOREF(pOutctx, w);
1690 return VINF_SUCCESS;
1691}
1692
1693
1694/*
1695 * Parse and verify the shader byte code. Extract input and output signatures into pInfo.
1696 */
1697int DXShaderParse(void const *pvShaderCode, uint32_t cbShaderCode, DXShaderInfo *pInfo)
1698{
1699 if (pInfo)
1700 RT_ZERO(*pInfo);
1701
1702 ASSERT_GUEST_RETURN(cbShaderCode <= SVGA3D_MAX_SHADER_MEMORY_BYTES, VERR_INVALID_PARAMETER);
1703 ASSERT_GUEST_RETURN((cbShaderCode & 0x3) == 0, VERR_INVALID_PARAMETER); /* Aligned to the token size. */
1704 ASSERT_GUEST_RETURN(cbShaderCode >= 8, VERR_INVALID_PARAMETER); /* At least program and length tokens. */
1705
1706 uint32_t const *paToken = (uint32_t *)pvShaderCode;
1707
1708 VGPU10ProgramToken const *pProgramToken = (VGPU10ProgramToken *)&paToken[0];
1709 ASSERT_GUEST_RETURN( pProgramToken->majorVersion >= 4
1710 && pProgramToken->programType <= VGPU10_COMPUTE_SHADER, VERR_INVALID_PARAMETER);
1711 if (pInfo)
1712 pInfo->enmProgramType = (VGPU10_PROGRAM_TYPE)pProgramToken->programType;
1713
1714 uint32_t const cToken = paToken[1];
1715 Log6(("Shader version %d.%d type %s(%d) Length %d\n",
1716 pProgramToken->majorVersion, pProgramToken->minorVersion, dxbcShaderTypeToString(pProgramToken->programType), pProgramToken->programType, cToken));
1717 ASSERT_GUEST_RETURN(cbShaderCode / 4 == cToken, VERR_INVALID_PARAMETER); /* Declared length should be equal to the actual. */
1718
1719 /* Write the parsed (and possibly modified) shader to a memory buffer. */
1720 DXBCByteWriter dxbcByteWriter;
1721 DXBCByteWriter *w = &dxbcByteWriter;
1722 if (!dxbcByteWriterInit(w, 4096 + cbShaderCode))
1723 return VERR_NO_MEMORY;
1724
1725 dxbcByteWriterAddTokens(w, paToken, 2);
1726
1727 DXBCTokenReader parser;
1728 RT_ZERO(parser);
1729
1730 DXBCTokenReader *r = &parser;
1731 r->pToken = &paToken[2];
1732 r->cToken = r->cRemainingToken = cToken - 2;
1733
1734 DXBCOUTPUTCTX outctx;
1735 dxbcOutputInit(&outctx, pProgramToken, cToken);
1736
1737 int rc = VINF_SUCCESS;
1738 while (dxbcTokenReaderCanRead(r, 1))
1739 {
1740 VGPUOpcode opcode;
1741 rc = dxbcParseOpcode(r, &opcode);
1742 ASSERT_GUEST_STMT_BREAK(RT_SUCCESS(rc), rc = VERR_INVALID_PARAMETER);
1743
1744 rc = dxbcOutputOpcode(&outctx, w, &opcode);
1745 AssertRCBreak(rc);
1746
1747 if (pInfo)
1748 {
1749 /* Fetch signatures. */
1750 SVGA3dDXSignatureEntry *pSignatureEntry = NULL;
1751 switch (opcode.opcodeType)
1752 {
1753 case VGPU10_OPCODE_DCL_INPUT:
1754 case VGPU10_OPCODE_DCL_INPUT_PS:
1755 case VGPU10_OPCODE_DCL_INPUT_SIV:
1756 ASSERT_GUEST_STMT_BREAK(pInfo->cInputSignature < RT_ELEMENTS(pInfo->aInputSignature), rc = VERR_INVALID_PARAMETER);
1757 pSignatureEntry = &pInfo->aInputSignature[pInfo->cInputSignature++];
1758 break;
1759 case VGPU10_OPCODE_DCL_OUTPUT:
1760 case VGPU10_OPCODE_DCL_OUTPUT_SIV:
1761 case VGPU10_OPCODE_DCL_OUTPUT_SGV:
1762 ASSERT_GUEST_STMT_BREAK(pInfo->cOutputSignature < RT_ELEMENTS(pInfo->aOutputSignature), rc = VERR_INVALID_PARAMETER);
1763 pSignatureEntry = &pInfo->aOutputSignature[pInfo->cOutputSignature++];
1764 break;
1765 default:
1766 break;
1767 }
1768
1769 if (RT_FAILURE(rc))
1770 break;
1771
1772 if (pSignatureEntry)
1773 {
1774 ASSERT_GUEST_STMT_BREAK( opcode.aValOperand[0].aOperandIndex[0].indexRepresentation == VGPU10_OPERAND_INDEX_IMMEDIATE32
1775 || opcode.aValOperand[0].aOperandIndex[0].indexRepresentation == VGPU10_OPERAND_INDEX_IMMEDIATE64,
1776 rc = VERR_NOT_SUPPORTED);
1777
1778 uint32_t const indexDimension = opcode.aValOperand[0].indexDimension;
1779 if (indexDimension == VGPU10_OPERAND_INDEX_0D)
1780 {
1781 if (opcode.aValOperand[0].operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID)
1782 {
1783 pSignatureEntry->registerIndex = 0;
1784 pSignatureEntry->semanticName = SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID;
1785 }
1786 else if (opcode.aValOperand[0].operandType == VGPU10_OPERAND_TYPE_OUTPUT_DEPTH)
1787 {
1788 /* oDepth is always last in the signature. Register index is equal to 0xFFFFFFFF. */
1789 pSignatureEntry->registerIndex = 0xFFFFFFFF;
1790 pSignatureEntry->semanticName = SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED;
1791 }
1792 else
1793 ASSERT_GUEST_FAILED_STMT_BREAK(rc = VERR_NOT_SUPPORTED);
1794 }
1795 else
1796 {
1797 ASSERT_GUEST_STMT_BREAK( indexDimension == VGPU10_OPERAND_INDEX_1D
1798 || indexDimension == VGPU10_OPERAND_INDEX_2D
1799 || indexDimension == VGPU10_OPERAND_INDEX_3D,
1800 rc = VERR_NOT_SUPPORTED);
1801 /* The register index seems to be in the highest dimension. */
1802 pSignatureEntry->registerIndex = opcode.aValOperand[0].aOperandIndex[indexDimension - VGPU10_OPERAND_INDEX_1D].iOperandImmediate;
1803 pSignatureEntry->semanticName = opcode.semanticName;
1804 }
1805 pSignatureEntry->mask = opcode.aValOperand[0].mask;
1806 pSignatureEntry->componentType = SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN; /// @todo Proper value? Seems that it is not important.
1807 pSignatureEntry->minPrecision = SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT;
1808 }
1809 }
1810 }
1811
1812 if (RT_FAILURE(rc))
1813 {
1814 return rc;
1815 }
1816
1817 rc = dxbcOutputFinalize(&outctx, w);
1818 if (RT_FAILURE(rc))
1819 {
1820 return rc;
1821 }
1822
1823 dxbcByteWriterFetchData(w, &pInfo->pvBytecode, &pInfo->cbBytecode);
1824
1825#ifdef LOG_ENABLED
1826 if (pInfo->cInputSignature)
1827 {
1828 Log6(("Input signatures:\n"));
1829 for (uint32_t i = 0; i < pInfo->cInputSignature; ++i)
1830 Log6((" [%u]: %u %u 0x%X\n", i, pInfo->aInputSignature[i].registerIndex, pInfo->aInputSignature[i].semanticName, pInfo->aInputSignature[i].mask));
1831 }
1832 if (pInfo->cOutputSignature)
1833 {
1834 Log6(("Output signatures:\n"));
1835 for (uint32_t i = 0; i < pInfo->cOutputSignature; ++i)
1836 Log6((" [%u]: %u %u 0x%X\n", i, pInfo->aOutputSignature[i].registerIndex, pInfo->aOutputSignature[i].semanticName, pInfo->aOutputSignature[i].mask));
1837 }
1838 if (pInfo->cPatchConstantSignature)
1839 {
1840 Log6(("Patch constant signatures:\n"));
1841 for (uint32_t i = 0; i < pInfo->cPatchConstantSignature; ++i)
1842 Log6((" [%u]: %u %u 0x%X\n", i, pInfo->aPatchConstantSignature[i].registerIndex, pInfo->aPatchConstantSignature[i].semanticName, pInfo->aPatchConstantSignature[i].mask));
1843 }
1844#endif
1845
1846 return VINF_SUCCESS;
1847}
1848
1849
1850void DXShaderFree(DXShaderInfo *pInfo)
1851{
1852 RTMemFree(pInfo->pvBytecode);
1853 RT_ZERO(*pInfo);
1854}
1855
1856
1857#if 0 // Unused. Replaced with dxbcSemanticInfo.
1858static char const *dxbcSemanticName(SVGA3dDXSignatureSemanticName enmSemanticName)
1859{
1860 /* https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-semantics#system-value-semantics */
1861 switch (enmSemanticName)
1862 {
1863 case SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION: return "SV_Position";
1864 case SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE: return "SV_ClipDistance";
1865 case SVGADX_SIGNATURE_SEMANTIC_NAME_CULL_DISTANCE: return "SV_CullDistance";
1866 case SVGADX_SIGNATURE_SEMANTIC_NAME_RENDER_TARGET_ARRAY_INDEX: return "SV_RenderTargetArrayIndex";
1867 case SVGADX_SIGNATURE_SEMANTIC_NAME_VIEWPORT_ARRAY_INDEX: return "SV_ViewportArrayIndex";
1868 case SVGADX_SIGNATURE_SEMANTIC_NAME_VERTEX_ID: return "SV_VertexID";
1869 case SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID: return "SV_PrimitiveID";
1870 case SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID: return "SV_InstanceID";
1871 case SVGADX_SIGNATURE_SEMANTIC_NAME_IS_FRONT_FACE: return "SV_IsFrontFace";
1872 case SVGADX_SIGNATURE_SEMANTIC_NAME_SAMPLE_INDEX: return "SV_SampleIndex";
1873 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR: return "SV_FinalQuadUeq0EdgeTessFactor";
1874 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR: return "SV_FinalQuadVeq0EdgeTessFactor";
1875 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR: return "SV_FinalQuadUeq1EdgeTessFactor";
1876 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR: return "SV_FinalQuadVeq1EdgeTessFactor";
1877 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR: return "SV_FinalQuadUInsideTessFactor";
1878 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR: return "SV_FinalQuadVInsideTessFactor";
1879 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR: return "SV_FinalTriUeq0EdgeTessFactor";
1880 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR: return "SV_FinalTriVeq0EdgeTessFactor";
1881 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR: return "SV_FinalTriWeq0EdgeTessFactor";
1882 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR: return "SV_FinalTriInsideTessFactor";
1883 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DETAIL_TESSFACTOR: return "SV_FinalLineDetailTessFactor";
1884 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DENSITY_TESSFACTOR: return "SV_FinalLineDensityTessFactor";
1885 default:
1886 Assert(enmSemanticName == SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
1887 break;
1888 }
1889 /* Generic. Arbitrary name. It does not have any meaning. */
1890 return "ATTRIB";
1891}
1892#endif
1893
1894
1895/* https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-semantics#system-value-semantics
1896 * Type:
1897 * 0 - undefined
1898 * 1 - unsigned int
1899 * 2 - signed int
1900 * 3 - float
1901 */
1902typedef struct VGPUSemanticInfo
1903{
1904 char const *pszName;
1905 uint32_t u32Type;
1906} VGPUSemanticInfo;
1907
1908static VGPUSemanticInfo const g_aSemanticInfo[SVGADX_SIGNATURE_SEMANTIC_NAME_MAX] =
1909{
1910 { "ATTRIB", 0 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED 0
1911 { "SV_Position", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION 1
1912 { "SV_ClipDistance", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE 2
1913 { "SV_CullDistance", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_CULL_DISTANCE 3
1914 { "SV_RenderTargetArrayIndex", 1 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_RENDER_TARGET_ARRAY_INDEX 4
1915 { "SV_ViewportArrayIndex", 1 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_VIEWPORT_ARRAY_INDEX 5
1916 { "SV_VertexID", 1 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_VERTEX_ID 6
1917 { "SV_PrimitiveID", 1 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID 7
1918 { "SV_InstanceID", 1 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID 8
1919 { "SV_IsFrontFace", 1 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_IS_FRONT_FACE 9
1920 { "SV_SampleIndex", 1 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_SAMPLE_INDEX 10
1921 /** @todo Is this a correct name for all TessFactors? */
1922 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR 11
1923 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR 12
1924 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR 13
1925 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR 14
1926 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR 15
1927 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR 16
1928 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR 17
1929 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR 18
1930 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR 19
1931 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR 20
1932 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DETAIL_TESSFACTOR 21
1933 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DENSITY_TESSFACTOR 22
1934};
1935
1936static VGPUSemanticInfo const g_SemanticPSOutput =
1937 { "SV_TARGET", 3 }; // SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED 0
1938
1939
1940static VGPUSemanticInfo const *dxbcSemanticInfo(DXShaderInfo const *pInfo, SVGA3dDXSignatureSemanticName enmSemanticName, uint32_t u32BlobType)
1941{
1942 if (enmSemanticName < RT_ELEMENTS(g_aSemanticInfo))
1943 {
1944 if ( enmSemanticName == 0
1945 && pInfo->enmProgramType == VGPU10_PIXEL_SHADER
1946 && u32BlobType == DXBC_BLOB_TYPE_OSGN)
1947 return &g_SemanticPSOutput;
1948 return &g_aSemanticInfo[enmSemanticName];
1949 }
1950 return &g_aSemanticInfo[0];
1951}
1952
1953
1954static int dxbcCreateIOSGNBlob(DXShaderInfo const *pInfo, DXBCHeader *pHdr, uint32_t u32BlobType,
1955 uint32_t cSignature, SVGA3dDXSignatureEntry const *paSignature, DXBCByteWriter *w)
1956{
1957 /* aIdxSignature contains signature indices. aIdxSignature[0] = signature index for register 0. */
1958 uint32_t aIdxSignature[32];
1959 memset(aIdxSignature, 0xFF, sizeof(aIdxSignature));
1960 AssertReturn(cSignature <= RT_ELEMENTS(aIdxSignature), VERR_INTERNAL_ERROR);
1961 for (uint32_t i = 0; i < cSignature; ++i)
1962 {
1963 SVGA3dDXSignatureEntry const *src = &paSignature[i];
1964 if (src->registerIndex == 0xFFFFFFFF)
1965 {
1966 /* oDepth for PS output. */
1967 ASSERT_GUEST_RETURN(pInfo->enmProgramType == VGPU10_PIXEL_SHADER, VERR_INVALID_PARAMETER);
1968
1969 /* Must be placed last in the signature. */
1970 ASSERT_GUEST_RETURN(aIdxSignature[cSignature - 1] == 0xFFFFFFFF, VERR_INVALID_PARAMETER);
1971 aIdxSignature[cSignature - 1] = i;
1972 continue;
1973 }
1974
1975 ASSERT_GUEST_RETURN(src->registerIndex < RT_ELEMENTS(aIdxSignature), VERR_INVALID_PARAMETER);
1976 ASSERT_GUEST_RETURN(aIdxSignature[src->registerIndex] == 0xFFFFFFFF, VERR_INVALID_PARAMETER);
1977 aIdxSignature[src->registerIndex] = i;
1978 }
1979
1980 uint32_t cbBlob = RT_UOFFSETOF(DXBCBlobIOSGN, aElement[cSignature])
1981 + cSignature * RT_SIZEOFMEMB(DXBCBlobIOSGN, aElement[0]);
1982 if (!dxbcByteWriterCanWrite(w, sizeof(DXBCBlobHeader) + cbBlob))
1983 return VERR_NO_MEMORY;
1984
1985 DXBCBlobHeader *pHdrBlob = (DXBCBlobHeader *)dxbcByteWriterPtr(w);
1986 pHdrBlob->u32BlobType = u32BlobType;
1987 // pHdrBlob->cbBlob = 0;
1988
1989 DXBCBlobIOSGN *pHdrISGN = (DXBCBlobIOSGN *)&pHdrBlob[1];
1990 pHdrISGN->cElement = cSignature;
1991 pHdrISGN->offElement = RT_UOFFSETOF(DXBCBlobIOSGN, aElement[0]);
1992
1993 uint32_t aSemanticIdx[SVGADX_SIGNATURE_SEMANTIC_NAME_MAX];
1994 RT_ZERO(aSemanticIdx);
1995 uint32_t iSignature = 0;
1996 for (uint32_t iReg = 0; iReg < RT_ELEMENTS(aIdxSignature); ++iReg)
1997 {
1998 if (aIdxSignature[iReg] == 0xFFFFFFFF) /* This register is unused. */
1999 continue;
2000
2001 AssertReturn(iSignature < cSignature, VERR_INTERNAL_ERROR);
2002
2003 SVGA3dDXSignatureEntry const *src = &paSignature[aIdxSignature[iReg]];
2004 DXBCBlobIOSGNElement *dst = &pHdrISGN->aElement[iSignature];
2005
2006 ASSERT_GUEST_RETURN(src->semanticName < SVGADX_SIGNATURE_SEMANTIC_NAME_MAX, VERR_INVALID_PARAMETER);
2007 VGPUSemanticInfo const *pSemanticInfo = dxbcSemanticInfo(pInfo, src->semanticName, u32BlobType);
2008
2009 dst->offElementName = cbBlob; /* Offset of the semantic's name relative to the start of the blob (without hdr). */
2010 /* Use the register index as the semantic index for generic attributes in order to
2011 * produce compatible semantic names between shaders.
2012 */
2013 dst->idxSemantic = src->semanticName == SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED
2014 ? src->registerIndex
2015 : aSemanticIdx[src->semanticName]++;
2016 dst->enmSystemValue = src->semanticName;
2017 dst->enmComponentType = src->componentType ? src->componentType : pSemanticInfo->u32Type;
2018 dst->idxRegister = src->registerIndex;
2019 dst->mask = src->mask;
2020 if (u32BlobType == DXBC_BLOB_TYPE_OSGN)
2021 dst->mask2 = 0;
2022 else
2023 dst->mask2 = src->mask;
2024
2025 /* Figure out the semantic name for this element. */
2026 char const * const pszElementName = pSemanticInfo->pszName;
2027 uint32_t const cbElementName = (uint32_t)strlen(pszElementName) + 1;
2028
2029 if (!dxbcByteWriterCanWrite(w, cbBlob + cbElementName))
2030 return VERR_NO_MEMORY;
2031
2032 char *pszElementNameDst = (char *)pHdrISGN + dst->offElementName;
2033 memcpy(pszElementNameDst, pszElementName, cbElementName);
2034
2035 cbBlob += cbElementName;
2036 ++iSignature;
2037 }
2038
2039 /* Blobs are 4 bytes aligned. Commit the blob data. */
2040 cbBlob = RT_ALIGN_32(cbBlob, 4);
2041 pHdrBlob->cbBlob = cbBlob;
2042 pHdr->cbTotal += cbBlob + sizeof(DXBCBlobHeader);
2043 dxbcByteWriterCommit(w, cbBlob + sizeof(DXBCBlobHeader));
2044 return VINF_SUCCESS;
2045}
2046
2047
2048static int dxbcCreateSHDRBlob(DXBCHeader *pHdr, uint32_t u32BlobType,
2049 void const *pvShader, uint32_t cbShader, DXBCByteWriter *w)
2050{
2051 uint32_t cbBlob = cbShader;
2052 if (!dxbcByteWriterCanWrite(w, sizeof(DXBCBlobHeader) + cbBlob))
2053 return VERR_NO_MEMORY;
2054
2055 DXBCBlobHeader *pHdrBlob = (DXBCBlobHeader *)dxbcByteWriterPtr(w);
2056 pHdrBlob->u32BlobType = u32BlobType;
2057 // pHdrBlob->cbBlob = 0;
2058
2059 memcpy(&pHdrBlob[1], pvShader, cbShader);
2060
2061 /* Blobs are 4 bytes aligned. Commit the blob data. */
2062 cbBlob = RT_ALIGN_32(cbBlob, 4);
2063 pHdrBlob->cbBlob = cbBlob;
2064 pHdr->cbTotal += cbBlob + sizeof(DXBCBlobHeader);
2065 dxbcByteWriterCommit(w, cbBlob + sizeof(DXBCBlobHeader));
2066 return VINF_SUCCESS;
2067}
2068
2069
2070/*
2071 * Create a DXBC container with signature and shader code data blobs.
2072 */
2073static int dxbcCreateFromInfo(DXShaderInfo const *pInfo, void const *pvShader, uint32_t cbShader, DXBCByteWriter *w)
2074{
2075 int rc;
2076
2077 /* Create a DXBC container with ISGN, OSGN and SHDR blobs. */
2078 uint32_t const cBlob = 3;
2079 uint32_t const cbHdr = RT_UOFFSETOF(DXBCHeader, aBlobOffset[cBlob]); /* Header with blob offsets. */
2080 if (!dxbcByteWriterCanWrite(w, cbHdr))
2081 return VERR_NO_MEMORY;
2082
2083 /* Container header. */
2084 DXBCHeader *pHdr = (DXBCHeader *)dxbcByteWriterPtr(w);
2085 pHdr->u32DXBC = DXBC_MAGIC;
2086 // RT_ZERO(pHdr->au8Hash);
2087 pHdr->u32Version = 1;
2088 pHdr->cbTotal = cbHdr;
2089 pHdr->cBlob = cBlob;
2090 //RT_ZERO(pHdr->aBlobOffset);
2091 dxbcByteWriterCommit(w, cbHdr);
2092
2093 /* Blobs. */
2094 uint32_t iBlob = 0;
2095
2096 pHdr->aBlobOffset[iBlob++] = dxbcByteWriterSize(w);
2097 rc = dxbcCreateIOSGNBlob(pInfo, pHdr, DXBC_BLOB_TYPE_ISGN, pInfo->cInputSignature, &pInfo->aInputSignature[0], w);
2098 AssertRCReturn(rc, rc);
2099
2100 pHdr->aBlobOffset[iBlob++] = dxbcByteWriterSize(w);
2101 rc = dxbcCreateIOSGNBlob(pInfo, pHdr, DXBC_BLOB_TYPE_OSGN, pInfo->cOutputSignature, &pInfo->aOutputSignature[0], w);
2102 AssertRCReturn(rc, rc);
2103
2104 pHdr->aBlobOffset[iBlob++] = dxbcByteWriterSize(w);
2105 rc = dxbcCreateSHDRBlob(pHdr, DXBC_BLOB_TYPE_SHDR, pvShader, cbShader, w);
2106 AssertRCReturn(rc, rc);
2107
2108 AssertCompile(RT_UOFFSETOF(DXBCHeader, u32Version) == 0x14);
2109 dxbcHash(&pHdr->u32Version, pHdr->cbTotal - RT_UOFFSETOF(DXBCHeader, u32Version), pHdr->au8Hash);
2110
2111 return VINF_SUCCESS;
2112}
2113
2114
2115int DXShaderCreateDXBC(DXShaderInfo const *pInfo, void **ppvDXBC, uint32_t *pcbDXBC)
2116{
2117 /* Build DXBC container. */
2118 int rc;
2119 DXBCByteWriter dxbcByteWriter;
2120 DXBCByteWriter *w = &dxbcByteWriter;
2121 if (dxbcByteWriterInit(w, 4096 + pInfo->cbBytecode))
2122 {
2123 rc = dxbcCreateFromInfo(pInfo, pInfo->pvBytecode, pInfo->cbBytecode, w);
2124 if (RT_SUCCESS(rc))
2125 dxbcByteWriterFetchData(w, ppvDXBC, pcbDXBC);
2126 }
2127 else
2128 rc = VERR_NO_MEMORY;
2129 return rc;
2130}
2131
2132
2133static char const *dxbcGetOutputSemanticName(DXShaderInfo const *pInfo, uint32_t idxRegister, uint32_t u32BlobType,
2134 uint32_t cSignature, SVGA3dDXSignatureEntry const *paSignature)
2135{
2136 for (uint32_t i = 0; i < cSignature; ++i)
2137 {
2138 SVGA3dDXSignatureEntry const *p = &paSignature[i];
2139 if (p->registerIndex == idxRegister)
2140 {
2141 AssertReturn(p->semanticName < SVGADX_SIGNATURE_SEMANTIC_NAME_MAX, NULL);
2142 VGPUSemanticInfo const *pSemanticInfo = dxbcSemanticInfo(pInfo, p->semanticName, u32BlobType);
2143 return pSemanticInfo->pszName;
2144 }
2145 }
2146 return NULL;
2147}
2148
2149char const *DXShaderGetOutputSemanticName(DXShaderInfo const *pInfo, uint32_t idxRegister)
2150{
2151 return dxbcGetOutputSemanticName(pInfo, idxRegister, DXBC_BLOB_TYPE_OSGN, pInfo->cOutputSignature, &pInfo->aOutputSignature[0]);
2152}
2153
2154
2155#ifdef DXBC_STANDALONE_TEST
2156static int dxbcCreateFromBytecode(void const *pvShaderCode, uint32_t cbShaderCode, void **ppvDXBC, uint32_t *pcbDXBC)
2157{
2158 /* Parse the shader bytecode and create DXBC container with resource, signature and shader bytecode blobs. */
2159 DXShaderInfo info;
2160 RT_ZERO(info);
2161 int rc = DXShaderParse(pvShaderCode, cbShaderCode, &info);
2162 if (RT_SUCCESS(rc))
2163 rc = DXShaderCreateDXBC(&info, info.pvBytecode, info.cbBytecode, ppvDXBC, pcbDXBC);
2164 return rc;
2165}
2166
2167static int parseShaderVM(void const *pvShaderCode, uint32_t cbShaderCode)
2168{
2169 void *pv = NULL;
2170 uint32_t cb = 0;
2171 int rc = dxbcCreateFromBytecode(pvShaderCode, cbShaderCode, &pv, &cb);
2172 if (RT_SUCCESS(rc))
2173 {
2174 /* Hexdump DXBC */
2175 printf("{\n");
2176 uint8_t *pu8 = (uint8_t *)pv;
2177 for (uint32_t i = 0; i < cb; ++i)
2178 {
2179 if ((i % 16) == 0)
2180 {
2181 if (i > 0)
2182 printf(",\n");
2183
2184 printf(" 0x%02x", pu8[i]);
2185 }
2186 else
2187 {
2188 printf(", 0x%02x", pu8[i]);
2189 }
2190 }
2191 printf("\n");
2192 printf("};\n");
2193
2194 RTMemFree(pv);
2195 }
2196
2197 return rc;
2198}
2199
2200static DXBCBlobHeader *dxbcFindBlob(DXBCHeader *pDXBCHeader, uint32_t u32BlobType)
2201{
2202 uint8_t const *pu8DXBCBegin = (uint8_t *)pDXBCHeader;
2203 for (uint32_t i = 0; i < pDXBCHeader->cBlob; ++i)
2204 {
2205 DXBCBlobHeader *pCurrentBlob = (DXBCBlobHeader *)&pu8DXBCBegin[pDXBCHeader->aBlobOffset[i]];
2206 if (pCurrentBlob->u32BlobType == u32BlobType)
2207 return pCurrentBlob;
2208 }
2209 return NULL;
2210}
2211
2212static int dxbcExtractShaderCode(DXBCHeader *pDXBCHeader, void **ppvCode, uint32_t *pcbCode)
2213{
2214 DXBCBlobHeader *pBlob = dxbcFindBlob(pDXBCHeader, DXBC_BLOB_TYPE_SHDR);
2215 AssertReturn(pBlob, VERR_NOT_IMPLEMENTED);
2216
2217 DXBCBlobSHDR *pSHDR = (DXBCBlobSHDR *)&pBlob[1];
2218 *pcbCode = pSHDR->cToken * 4;
2219 *ppvCode = RTMemAlloc(*pcbCode);
2220 AssertReturn(*ppvCode, VERR_NO_MEMORY);
2221
2222 memcpy(*ppvCode, pSHDR, *pcbCode);
2223 return VINF_SUCCESS;
2224}
2225
2226static int parseShaderDXBC(void const *pvDXBC)
2227{
2228 DXBCHeader *pDXBCHeader = (DXBCHeader *)pvDXBC;
2229 void *pvShaderCode = NULL;
2230 uint32_t cbShaderCode = 0;
2231 int rc = dxbcExtractShaderCode(pDXBCHeader, &pvShaderCode, &cbShaderCode);
2232 if (RT_SUCCESS(rc))
2233 {
2234 rc = parseShaderVM(pvShaderCode, cbShaderCode);
2235 RTMemFree(pvShaderCode);
2236 }
2237 return rc;
2238}
2239#endif /* DXBC_STANDALONE_TEST */
注意: 瀏覽 TracBrowser 來幫助您使用儲存庫瀏覽器

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette