VirtualBox

source: vbox/trunk/src/VBox/Devices/Graphics/shaderlib/glsl_shader.c@ 53222

最後變更 在這個檔案從53222是 53222,由 vboxsync 提交於 10 年 前

Devices/vmsvga: enabled 3D and a few warning fixes

  • 屬性 svn:eol-style 設為 native
  • 屬性 svn:keywords 設為 Author Date Id Revision
檔案大小: 212.4 KB
 
1/*
2 * GLSL pixel and vertex shader implementation
3 *
4 * Copyright 2006 Jason Green
5 * Copyright 2006-2007 Henri Verbeet
6 * Copyright 2007-2008 Stefan Dösinger for CodeWeavers
7 * Copyright 2009 Henri Verbeet for CodeWeavers
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
22 */
23
24/*
25 * Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice
26 * other than GPL or LGPL is available it will apply instead, Oracle elects to use only
27 * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where
28 * a choice of LGPL license versions is made available with the language indicating
29 * that LGPLv2 or any later version may be used, or where a choice of which version
30 * of the LGPL is applied is otherwise unspecified.
31 */
32
33/*
34 * D3D shader asm has swizzles on source parameters, and write masks for
35 * destination parameters. GLSL uses swizzles for both. The result of this is
36 * that for example "mov dst.xw, src.zyxw" becomes "dst.xw = src.zw" in GLSL.
37 * Ie, to generate a proper GLSL source swizzle, we need to take the D3D write
38 * mask for the destination parameter into account.
39 */
40
41#include "config.h"
42#include "wine/port.h"
43#include <limits.h>
44#include <stdio.h>
45#include "wined3d_private.h"
46
47WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
48WINE_DECLARE_DEBUG_CHANNEL(d3d_constants);
49WINE_DECLARE_DEBUG_CHANNEL(d3d_caps);
50WINE_DECLARE_DEBUG_CHANNEL(d3d);
51
52#ifdef VBOX_WITH_VMSVGA
53#define LOG_GROUP LOG_GROUP_DEV_VMSVGA
54#include <VBox/log.h>
55#undef WDLOG
56#define WDLOG(_m) Log(_m)
57#undef CONST
58#define CONST const
59#endif
60
61#define GLINFO_LOCATION (*gl_info)
62
63#define WINED3D_GLSL_SAMPLE_PROJECTED 0x1
64#define WINED3D_GLSL_SAMPLE_RECT 0x2
65#define WINED3D_GLSL_SAMPLE_LOD 0x4
66#define WINED3D_GLSL_SAMPLE_GRAD 0x8
67
68typedef struct {
69 char reg_name[150];
70 char mask_str[6];
71} glsl_dst_param_t;
72
73typedef struct {
74 char reg_name[150];
75 char param_str[200];
76} glsl_src_param_t;
77
78typedef struct {
79 const char *name;
80 DWORD coord_mask;
81} glsl_sample_function_t;
82
83enum heap_node_op
84{
85 HEAP_NODE_TRAVERSE_LEFT,
86 HEAP_NODE_TRAVERSE_RIGHT,
87 HEAP_NODE_POP,
88};
89
90struct constant_entry
91{
92 unsigned int idx;
93 unsigned int version;
94};
95
96struct constant_heap
97{
98 struct constant_entry *entries;
99 unsigned int *positions;
100 unsigned int size;
101};
102
103/* GLSL shader private data */
104struct shader_glsl_priv {
105 struct wined3d_shader_buffer shader_buffer;
106 struct wine_rb_tree program_lookup;
107 struct glsl_shader_prog_link *glsl_program;
108 struct constant_heap vconst_heap;
109 struct constant_heap pconst_heap;
110 unsigned char *stack;
111 GLhandleARB depth_blt_program[tex_type_count];
112 UINT next_constant_version;
113};
114
115/* Struct to maintain data about a linked GLSL program */
116struct glsl_shader_prog_link {
117 struct wine_rb_entry program_lookup_entry;
118 struct list vshader_entry;
119 struct list pshader_entry;
120 GLhandleARB programId;
121 GLint *vuniformF_locations;
122 GLint *puniformF_locations;
123 GLint vuniformI_locations[MAX_CONST_I];
124 GLint puniformI_locations[MAX_CONST_I];
125 GLint posFixup_location;
126 GLint np2Fixup_location;
127 GLint bumpenvmat_location[MAX_TEXTURES];
128 GLint luminancescale_location[MAX_TEXTURES];
129 GLint luminanceoffset_location[MAX_TEXTURES];
130 GLint ycorrection_location;
131 GLenum vertex_color_clamp;
132 IWineD3DVertexShader *vshader;
133 IWineD3DPixelShader *pshader;
134 struct vs_compile_args vs_args;
135 struct ps_compile_args ps_args;
136 UINT constant_version;
137 const struct wined3d_context *context;
138 UINT inp2Fixup_info;
139};
140
141#define WINEFIXUPINFO_NOINDEX (~0UL)
142#define WINEFIXUPINFO_GET(_p) get_fixup_info((const IWineD3DPixelShaderImpl*)(_p)->pshader, (_p)->inp2Fixup_info)
143#define WINEFIXUPINFO_ISVALID(_p) ((_p)->inp2Fixup_info != WINEFIXUPINFO_NOINDEX)
144#define WINEFIXUPINFO_INIT(_p) ((_p)->inp2Fixup_info == WINEFIXUPINFO_NOINDEX)
145
146typedef struct {
147 IWineD3DVertexShader *vshader;
148 IWineD3DPixelShader *pshader;
149 struct ps_compile_args ps_args;
150 struct vs_compile_args vs_args;
151 const struct wined3d_context *context;
152} glsl_program_key_t;
153
154struct shader_glsl_ctx_priv {
155 const struct vs_compile_args *cur_vs_args;
156 const struct ps_compile_args *cur_ps_args;
157 struct ps_np2fixup_info *cur_np2fixup_info;
158};
159
160struct glsl_ps_compiled_shader
161{
162 struct ps_compile_args args;
163 struct ps_np2fixup_info np2fixup;
164 GLhandleARB prgId;
165 const struct wined3d_context *context;
166};
167
168struct glsl_pshader_private
169{
170 struct glsl_ps_compiled_shader *gl_shaders;
171 UINT num_gl_shaders, shader_array_size;
172};
173
174struct glsl_vs_compiled_shader
175{
176 struct vs_compile_args args;
177 GLhandleARB prgId;
178 const struct wined3d_context *context;
179};
180
181struct glsl_vshader_private
182{
183 struct glsl_vs_compiled_shader *gl_shaders;
184 UINT num_gl_shaders, shader_array_size;
185};
186
187static const char *debug_gl_shader_type(GLenum type)
188{
189 switch (type)
190 {
191#define WINED3D_TO_STR(u) case u: return #u
192 WINED3D_TO_STR(GL_VERTEX_SHADER_ARB);
193 WINED3D_TO_STR(GL_GEOMETRY_SHADER_ARB);
194 WINED3D_TO_STR(GL_FRAGMENT_SHADER_ARB);
195#undef WINED3D_TO_STR
196 default:
197 return wine_dbg_sprintf("UNKNOWN(%#x)", type);
198 }
199}
200
201/* Extract a line from the info log.
202 * Note that this modifies the source string. */
203static char *get_info_log_line(char **ptr, int *pcbStr)
204{
205 char *p, *q;
206 const int cbStr = *pcbStr;
207
208 if (!cbStr)
209 {
210 /* zero-length string */
211 return NULL;
212 }
213
214 if ((*ptr)[cbStr-1] != '\0')
215 {
216 ERR("string should be null-rerminated, forcing it!");
217 (*ptr)[cbStr-1] = '\0';
218 }
219 p = *ptr;
220 if (!*p)
221 {
222 *pcbStr = 0;
223 return NULL;
224 }
225
226 if (!(q = strstr(p, "\n")))
227 {
228 /* the string contains a single line! */
229 *ptr += strlen(p);
230 *pcbStr = 0;
231 return p;
232 }
233
234 *q = '\0';
235 *pcbStr = cbStr - (((uintptr_t)q) - ((uintptr_t)p)) - 1;
236 Assert((*pcbStr) >= 0);
237 Assert((*pcbStr) < cbStr);
238 *ptr = q + 1;
239
240 return p;
241}
242
243/** Prints the GLSL info log which will contain error messages if they exist */
244/* GL locking is done by the caller */
245static void print_glsl_info_log(const struct wined3d_gl_info *gl_info, GLhandleARB obj)
246{
247 int infologLength = 0;
248 char *infoLog;
249 unsigned int i;
250 BOOL is_spam;
251
252 static const char * const spam[] =
253 {
254 "Vertex shader was successfully compiled to run on hardware.\n", /* fglrx */
255 "Fragment shader was successfully compiled to run on hardware.\n", /* fglrx, with \n */
256 "Fragment shader was successfully compiled to run on hardware.", /* fglrx, no \n */
257 "Fragment shader(s) linked, vertex shader(s) linked. \n ", /* fglrx, with \n */
258 "Fragment shader(s) linked, vertex shader(s) linked.", /* fglrx, no \n */
259 "Vertex shader(s) linked, no fragment shader(s) defined. \n ", /* fglrx, with \n */
260 "Vertex shader(s) linked, no fragment shader(s) defined.", /* fglrx, no \n */
261 "Fragment shader(s) linked, no vertex shader(s) defined. \n ", /* fglrx, with \n */
262 "Fragment shader(s) linked, no vertex shader(s) defined.", /* fglrx, no \n */
263 };
264
265#ifndef VBOXWINEDBG_SHADERS
266 if (!TRACE_ON(d3d_shader) && !FIXME_ON(d3d_shader)) return;
267#endif
268
269 GL_EXTCALL(glGetObjectParameterivARB(obj,
270 GL_OBJECT_INFO_LOG_LENGTH_ARB,
271 &infologLength));
272
273 /* A size of 1 is just a null-terminated string, so the log should be bigger than
274 * that if there are errors. */
275 if (infologLength > 1)
276 {
277 char *ptr, *line;
278 int cbPtr;
279
280 /* Fglrx doesn't terminate the string properly, but it tells us the proper length.
281 * So use HEAP_ZERO_MEMORY to avoid uninitialized bytes
282 */
283 infoLog = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, infologLength);
284 GL_EXTCALL(glGetInfoLogARB(obj, infologLength, NULL, infoLog));
285 is_spam = FALSE;
286
287 for(i = 0; i < sizeof(spam) / sizeof(spam[0]); i++) {
288 if(strcmp(infoLog, spam[i]) == 0) {
289 is_spam = TRUE;
290 break;
291 }
292 }
293
294 ptr = infoLog;
295 cbPtr = infologLength;
296 if (is_spam)
297 {
298 WDLOG(("Spam received from GLSL shader #%u:\n", obj));
299 while ((line = get_info_log_line(&ptr, &cbPtr))) WDLOG((" %s\n", line));
300 }
301 else
302 {
303 WDLOG(("Error received from GLSL shader #%u:\n", obj));
304 while ((line = get_info_log_line(&ptr, &cbPtr))) WDLOG((" %s\n", line));
305 }
306 HeapFree(GetProcessHeap(), 0, infoLog);
307 }
308}
309
310static void shader_glsl_dump_shader_source(const struct wined3d_gl_info *gl_info, GLhandleARB shader)
311{
312 char *ptr;
313 GLint tmp, source_size;
314 char *source = NULL;
315 int cbPtr;
316
317 GL_EXTCALL(glGetObjectParameterivARB(shader, GL_OBJECT_SHADER_SOURCE_LENGTH_ARB, &tmp));
318
319 source = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, tmp);
320 if (!source)
321 {
322 ERR("Failed to allocate %d bytes for shader source.\n", tmp);
323 return;
324 }
325
326 source_size = tmp;
327
328 WDLOG(("Object %u:\n", shader));
329 GL_EXTCALL(glGetObjectParameterivARB(shader, GL_OBJECT_SUBTYPE_ARB, &tmp));
330 WDLOG((" GL_OBJECT_SUBTYPE_ARB: %s.\n", debug_gl_shader_type(tmp)));
331 GL_EXTCALL(glGetObjectParameterivARB(shader, GL_OBJECT_COMPILE_STATUS_ARB, &tmp));
332 WDLOG((" GL_OBJECT_COMPILE_STATUS_ARB: %d.\n", tmp));
333 WDLOG(("\n"));
334
335 ptr = source;
336 cbPtr = source_size;
337 GL_EXTCALL(glGetShaderSourceARB(shader, source_size, NULL, source));
338#if 0
339 while ((line = get_info_log_line(&ptr, &cbPtr))) WDLOG((" %s\n", line));
340#else
341 WDLOG(("*****shader source***\n"));
342 WDLOG((" %s\n", source));
343 WDLOG(("\n*****END shader source***\n\n"));
344#endif
345 WDLOG(("\n"));
346}
347
348/* GL locking is done by the caller. */
349static void shader_glsl_dump_program_source(const struct wined3d_gl_info *gl_info, GLhandleARB program)
350{
351 GLint i, object_count;
352 GLhandleARB *objects;
353 char *source = NULL;
354
355 WDLOG(("\n***************************dumping program %d******************************\n", program));
356
357 GL_EXTCALL(glGetObjectParameterivARB(program, GL_OBJECT_ATTACHED_OBJECTS_ARB, &object_count));
358 objects = HeapAlloc(GetProcessHeap(), 0, object_count * sizeof(*objects));
359 if (!objects)
360 {
361 ERR("Failed to allocate object array memory.\n");
362 return;
363 }
364
365 GL_EXTCALL(glGetAttachedObjectsARB(program, object_count, NULL, objects));
366 for (i = 0; i < object_count; ++i)
367 {
368 shader_glsl_dump_shader_source(gl_info, objects[i]);
369 }
370
371 HeapFree(GetProcessHeap(), 0, source);
372 HeapFree(GetProcessHeap(), 0, objects);
373
374 WDLOG(("\n***************************END dumping program %d******************************\n\n", program));
375}
376
377/* GL locking is done by the caller. */
378static void shader_glsl_validate_compile_link(const struct wined3d_gl_info *gl_info, GLhandleARB program, GLboolean fIsProgram)
379{
380 GLint tmp = -1;
381
382#ifndef VBOXWINEDBG_SHADERS
383 if (!TRACE_ON(d3d_shader) && !FIXME_ON(d3d_shader)) return;
384#endif
385
386 GL_EXTCALL(glGetObjectParameterivARB(program, GL_OBJECT_TYPE_ARB, &tmp));
387 if (tmp == GL_PROGRAM_OBJECT_ARB)
388 {
389 if (!fIsProgram)
390 {
391 ERR("this is a program, but shader expected");
392 }
393 GL_EXTCALL(glGetObjectParameterivARB(program, GL_OBJECT_LINK_STATUS_ARB, &tmp));
394 if (!tmp)
395 {
396 ERR("Program %u link status invalid.\n", program);
397#ifndef VBOXWINEDBG_SHADERS
398 shader_glsl_dump_program_source(gl_info, program);
399#endif
400 }
401#if defined(VBOX_WITH_VMSVGA) && defined(DEBUG)
402 shader_glsl_dump_program_source(gl_info, program);
403#endif
404 }
405 else if (tmp == GL_SHADER_OBJECT_ARB)
406 {
407 if (fIsProgram)
408 {
409 ERR("this is a shader, but program expected");
410 }
411
412 GL_EXTCALL(glGetObjectParameterivARB(program, GL_OBJECT_COMPILE_STATUS_ARB, &tmp));
413 if (!tmp)
414 {
415 ERR("Shader %u compile status invalid.\n", program);
416 shader_glsl_dump_shader_source(gl_info, program);
417 }
418 }
419 else
420 {
421 ERR("unexpected oject type(%d)!", tmp);
422 }
423
424 print_glsl_info_log(gl_info, program);
425}
426
427/**
428 * Loads (pixel shader) samplers
429 */
430/* GL locking is done by the caller */
431static void shader_glsl_load_psamplers(const struct wined3d_gl_info *gl_info,
432 DWORD *tex_unit_map, GLhandleARB programId)
433{
434 GLint name_loc;
435 int i;
436 char sampler_name[20];
437
438 for (i = 0; i < MAX_FRAGMENT_SAMPLERS; ++i) {
439 snprintf(sampler_name, sizeof(sampler_name), "Psampler%d", i);
440 name_loc = GL_EXTCALL(glGetUniformLocationARB(programId, sampler_name));
441 if (name_loc != -1) {
442 DWORD mapped_unit = tex_unit_map[i];
443 if (mapped_unit != WINED3D_UNMAPPED_STAGE && mapped_unit < gl_info->limits.fragment_samplers)
444 {
445 TRACE("Loading %s for texture %d\n", sampler_name, mapped_unit);
446 GL_EXTCALL(glUniform1iARB(name_loc, mapped_unit));
447 checkGLcall("glUniform1iARB");
448 } else {
449 ERR("Trying to load sampler %s on unsupported unit %d\n", sampler_name, mapped_unit);
450 }
451 }
452 }
453}
454
455/* GL locking is done by the caller */
456static void shader_glsl_load_vsamplers(const struct wined3d_gl_info *gl_info,
457 DWORD *tex_unit_map, GLhandleARB programId)
458{
459 GLint name_loc;
460 char sampler_name[20];
461 int i;
462
463 for (i = 0; i < MAX_VERTEX_SAMPLERS; ++i) {
464 snprintf(sampler_name, sizeof(sampler_name), "Vsampler%d", i);
465 name_loc = GL_EXTCALL(glGetUniformLocationARB(programId, sampler_name));
466 if (name_loc != -1) {
467 DWORD mapped_unit = tex_unit_map[MAX_FRAGMENT_SAMPLERS + i];
468 if (mapped_unit != WINED3D_UNMAPPED_STAGE && mapped_unit < gl_info->limits.combined_samplers)
469 {
470 TRACE("Loading %s for texture %d\n", sampler_name, mapped_unit);
471 GL_EXTCALL(glUniform1iARB(name_loc, mapped_unit));
472 checkGLcall("glUniform1iARB");
473 } else {
474 ERR("Trying to load sampler %s on unsupported unit %d\n", sampler_name, mapped_unit);
475 }
476 }
477 }
478}
479
480/* GL locking is done by the caller */
481static inline void walk_constant_heap(const struct wined3d_gl_info *gl_info, const float *constants,
482 const GLint *constant_locations, const struct constant_heap *heap, unsigned char *stack, DWORD version)
483{
484 int stack_idx = 0;
485 unsigned int heap_idx = 1;
486 unsigned int idx;
487
488 if (heap->entries[heap_idx].version <= version) return;
489
490 idx = heap->entries[heap_idx].idx;
491 if (constant_locations[idx] != -1) GL_EXTCALL(glUniform4fvARB(constant_locations[idx], 1, &constants[idx * 4]));
492 stack[stack_idx] = HEAP_NODE_TRAVERSE_LEFT;
493
494 while (stack_idx >= 0)
495 {
496 /* Note that we fall through to the next case statement. */
497 switch(stack[stack_idx])
498 {
499 case HEAP_NODE_TRAVERSE_LEFT:
500 {
501 unsigned int left_idx = heap_idx << 1;
502 if (left_idx < heap->size && heap->entries[left_idx].version > version)
503 {
504 heap_idx = left_idx;
505 idx = heap->entries[heap_idx].idx;
506 if (constant_locations[idx] != -1)
507 GL_EXTCALL(glUniform4fvARB(constant_locations[idx], 1, &constants[idx * 4]));
508
509 stack[stack_idx++] = HEAP_NODE_TRAVERSE_RIGHT;
510 stack[stack_idx] = HEAP_NODE_TRAVERSE_LEFT;
511 break;
512 }
513 }
514
515 case HEAP_NODE_TRAVERSE_RIGHT:
516 {
517 unsigned int right_idx = (heap_idx << 1) + 1;
518 if (right_idx < heap->size && heap->entries[right_idx].version > version)
519 {
520 heap_idx = right_idx;
521 idx = heap->entries[heap_idx].idx;
522 if (constant_locations[idx] != -1)
523 GL_EXTCALL(glUniform4fvARB(constant_locations[idx], 1, &constants[idx * 4]));
524
525 stack[stack_idx++] = HEAP_NODE_POP;
526 stack[stack_idx] = HEAP_NODE_TRAVERSE_LEFT;
527 break;
528 }
529 }
530
531 case HEAP_NODE_POP:
532 {
533 heap_idx >>= 1;
534 --stack_idx;
535 break;
536 }
537 }
538 }
539 checkGLcall("walk_constant_heap()");
540}
541
542/* GL locking is done by the caller */
543static inline void apply_clamped_constant(const struct wined3d_gl_info *gl_info, GLint location, const GLfloat *data)
544{
545 GLfloat clamped_constant[4];
546
547 if (location == -1) return;
548
549 clamped_constant[0] = data[0] < -1.0f ? -1.0f : data[0] > 1.0f ? 1.0f : data[0];
550 clamped_constant[1] = data[1] < -1.0f ? -1.0f : data[1] > 1.0f ? 1.0f : data[1];
551 clamped_constant[2] = data[2] < -1.0f ? -1.0f : data[2] > 1.0f ? 1.0f : data[2];
552 clamped_constant[3] = data[3] < -1.0f ? -1.0f : data[3] > 1.0f ? 1.0f : data[3];
553
554 GL_EXTCALL(glUniform4fvARB(location, 1, clamped_constant));
555}
556
557/* GL locking is done by the caller */
558static inline void walk_constant_heap_clamped(const struct wined3d_gl_info *gl_info, const float *constants,
559 const GLint *constant_locations, const struct constant_heap *heap, unsigned char *stack, DWORD version)
560{
561 int stack_idx = 0;
562 unsigned int heap_idx = 1;
563 unsigned int idx;
564
565 if (heap->entries[heap_idx].version <= version) return;
566
567 idx = heap->entries[heap_idx].idx;
568 apply_clamped_constant(gl_info, constant_locations[idx], &constants[idx * 4]);
569 stack[stack_idx] = HEAP_NODE_TRAVERSE_LEFT;
570
571 while (stack_idx >= 0)
572 {
573 /* Note that we fall through to the next case statement. */
574 switch(stack[stack_idx])
575 {
576 case HEAP_NODE_TRAVERSE_LEFT:
577 {
578 unsigned int left_idx = heap_idx << 1;
579 if (left_idx < heap->size && heap->entries[left_idx].version > version)
580 {
581 heap_idx = left_idx;
582 idx = heap->entries[heap_idx].idx;
583 apply_clamped_constant(gl_info, constant_locations[idx], &constants[idx * 4]);
584
585 stack[stack_idx++] = HEAP_NODE_TRAVERSE_RIGHT;
586 stack[stack_idx] = HEAP_NODE_TRAVERSE_LEFT;
587 break;
588 }
589 }
590
591 case HEAP_NODE_TRAVERSE_RIGHT:
592 {
593 unsigned int right_idx = (heap_idx << 1) + 1;
594 if (right_idx < heap->size && heap->entries[right_idx].version > version)
595 {
596 heap_idx = right_idx;
597 idx = heap->entries[heap_idx].idx;
598 apply_clamped_constant(gl_info, constant_locations[idx], &constants[idx * 4]);
599
600 stack[stack_idx++] = HEAP_NODE_POP;
601 stack[stack_idx] = HEAP_NODE_TRAVERSE_LEFT;
602 break;
603 }
604 }
605
606 case HEAP_NODE_POP:
607 {
608 heap_idx >>= 1;
609 --stack_idx;
610 break;
611 }
612 }
613 }
614 checkGLcall("walk_constant_heap_clamped()");
615}
616
617/* Loads floating point constants (aka uniforms) into the currently set GLSL program. */
618/* GL locking is done by the caller */
619static void shader_glsl_load_constantsF(IWineD3DBaseShaderImpl *This, const struct wined3d_gl_info *gl_info,
620 const float *constants, const GLint *constant_locations, const struct constant_heap *heap,
621 unsigned char *stack, UINT version)
622{
623 const local_constant *lconst;
624
625 /* 1.X pshaders have the constants clamped to [-1;1] implicitly. */
626 if (This->baseShader.reg_maps.shader_version.major == 1
627 && shader_is_pshader_version(This->baseShader.reg_maps.shader_version.type))
628 walk_constant_heap_clamped(gl_info, constants, constant_locations, heap, stack, version);
629 else
630 walk_constant_heap(gl_info, constants, constant_locations, heap, stack, version);
631
632 if (!This->baseShader.load_local_constsF)
633 {
634 TRACE("No need to load local float constants for this shader\n");
635 return;
636 }
637
638 /* Immediate constants are clamped to [-1;1] at shader creation time if needed */
639 LIST_FOR_EACH_ENTRY(lconst, &This->baseShader.constantsF, local_constant, entry)
640 {
641 GLint location = constant_locations[lconst->idx];
642 /* We found this uniform name in the program - go ahead and send the data */
643 if (location != -1) GL_EXTCALL(glUniform4fvARB(location, 1, (const GLfloat *)lconst->value));
644 }
645 checkGLcall("glUniform4fvARB()");
646}
647
648/* Loads integer constants (aka uniforms) into the currently set GLSL program. */
649/* GL locking is done by the caller */
650static void shader_glsl_load_constantsI(IWineD3DBaseShaderImpl *This, const struct wined3d_gl_info *gl_info,
651 const GLint locations[MAX_CONST_I], const int *constants, WORD constants_set)
652{
653 unsigned int i;
654 struct list* ptr;
655
656 for (i = 0; constants_set; constants_set >>= 1, ++i)
657 {
658 if (!(constants_set & 1)) continue;
659
660 TRACE_(d3d_constants)("Loading constants %u: %i, %i, %i, %i\n",
661 i, constants[i*4], constants[i*4+1], constants[i*4+2], constants[i*4+3]);
662
663 /* We found this uniform name in the program - go ahead and send the data */
664 GL_EXTCALL(glUniform4ivARB(locations[i], 1, &constants[i*4]));
665 checkGLcall("glUniform4ivARB");
666 }
667
668 /* Load immediate constants */
669 ptr = list_head(&This->baseShader.constantsI);
670 while (ptr) {
671 const struct local_constant *lconst = LIST_ENTRY(ptr, const struct local_constant, entry);
672 unsigned int idx = lconst->idx;
673 const GLint *values = (const GLint *)lconst->value;
674
675 TRACE_(d3d_constants)("Loading local constants %i: %i, %i, %i, %i\n", idx,
676 values[0], values[1], values[2], values[3]);
677
678 /* We found this uniform name in the program - go ahead and send the data */
679 GL_EXTCALL(glUniform4ivARB(locations[idx], 1, values));
680 checkGLcall("glUniform4ivARB");
681 ptr = list_next(&This->baseShader.constantsI, ptr);
682 }
683}
684
685/* Loads boolean constants (aka uniforms) into the currently set GLSL program. */
686/* GL locking is done by the caller */
687static void shader_glsl_load_constantsB(IWineD3DBaseShaderImpl *This, const struct wined3d_gl_info *gl_info,
688 GLhandleARB programId, const BOOL *constants, WORD constants_set)
689{
690 GLint tmp_loc;
691 unsigned int i;
692 char tmp_name[8];
693 const char *prefix;
694 struct list* ptr;
695
696 switch (This->baseShader.reg_maps.shader_version.type)
697 {
698 case WINED3D_SHADER_TYPE_VERTEX:
699 prefix = "VB";
700 break;
701
702 case WINED3D_SHADER_TYPE_GEOMETRY:
703 prefix = "GB";
704 break;
705
706 case WINED3D_SHADER_TYPE_PIXEL:
707 prefix = "PB";
708 break;
709
710 default:
711 FIXME("Unknown shader type %#x.\n",
712 This->baseShader.reg_maps.shader_version.type);
713 prefix = "UB";
714 break;
715 }
716
717 /* TODO: Benchmark and see if it would be beneficial to store the
718 * locations of the constants to avoid looking up each time */
719 for (i = 0; constants_set; constants_set >>= 1, ++i)
720 {
721 if (!(constants_set & 1)) continue;
722
723 TRACE_(d3d_constants)("Loading constants %i: %i;\n", i, constants[i]);
724
725 /* TODO: Benchmark and see if it would be beneficial to store the
726 * locations of the constants to avoid looking up each time */
727 snprintf(tmp_name, sizeof(tmp_name), "%s[%i]", prefix, i);
728 tmp_loc = GL_EXTCALL(glGetUniformLocationARB(programId, tmp_name));
729 if (tmp_loc != -1)
730 {
731 /* We found this uniform name in the program - go ahead and send the data */
732 GL_EXTCALL(glUniform1ivARB(tmp_loc, 1, &constants[i]));
733 checkGLcall("glUniform1ivARB");
734 }
735 }
736
737 /* Load immediate constants */
738 ptr = list_head(&This->baseShader.constantsB);
739 while (ptr) {
740 const struct local_constant *lconst = LIST_ENTRY(ptr, const struct local_constant, entry);
741 unsigned int idx = lconst->idx;
742 const GLint *values = (const GLint *)lconst->value;
743
744 TRACE_(d3d_constants)("Loading local constants %i: %i\n", idx, values[0]);
745
746 snprintf(tmp_name, sizeof(tmp_name), "%s[%i]", prefix, idx);
747 tmp_loc = GL_EXTCALL(glGetUniformLocationARB(programId, tmp_name));
748 if (tmp_loc != -1) {
749 /* We found this uniform name in the program - go ahead and send the data */
750 GL_EXTCALL(glUniform1ivARB(tmp_loc, 1, values));
751 checkGLcall("glUniform1ivARB");
752 }
753 ptr = list_next(&This->baseShader.constantsB, ptr);
754 }
755}
756
757static void reset_program_constant_version(struct wine_rb_entry *entry, void *context)
758{
759 WINE_RB_ENTRY_VALUE(entry, struct glsl_shader_prog_link, program_lookup_entry)->constant_version = 0;
760}
761
762static const struct ps_np2fixup_info * get_fixup_info(const IWineD3DPixelShaderImpl *shader, UINT inp2fixup_info)
763{
764 struct glsl_pshader_private *shader_data = shader->baseShader.backend_data;
765
766 if (inp2fixup_info == WINEFIXUPINFO_NOINDEX)
767 return NULL;
768
769 if (!shader->baseShader.backend_data)
770 {
771 ERR("no backend data\n");
772 return NULL;
773 }
774 shader_data = shader->baseShader.backend_data;
775
776 if (inp2fixup_info >= shader_data->num_gl_shaders)
777 {
778 ERR("invalid index\n");
779 return NULL;
780 }
781
782 return &shader_data->gl_shaders[inp2fixup_info].np2fixup;
783}
784
785/**
786 * Loads the texture dimensions for NP2 fixup into the currently set GLSL program.
787 */
788/* GL locking is done by the caller (state handler) */
789static void shader_glsl_load_np2fixup_constants(
790 IWineD3DDevice* device,
791 char usePixelShader,
792 char useVertexShader) {
793
794 const IWineD3DDeviceImpl* deviceImpl = (const IWineD3DDeviceImpl*) device;
795 const struct glsl_shader_prog_link* prog = ((struct shader_glsl_priv *)(deviceImpl->shader_priv))->glsl_program;
796
797 if (!prog) {
798 /* No GLSL program set - nothing to do. */
799 return;
800 }
801
802 if (!usePixelShader) {
803 /* NP2 texcoord fixup is (currently) only done for pixelshaders. */
804 return;
805 }
806
807 if (prog->ps_args.np2_fixup && -1 != prog->np2Fixup_location) {
808 const struct wined3d_gl_info *gl_info = &deviceImpl->adapter->gl_info;
809 const IWineD3DStateBlockImpl* stateBlock = (const IWineD3DStateBlockImpl*) deviceImpl->stateBlock;
810 UINT i;
811 UINT fixup = prog->ps_args.np2_fixup;
812 GLfloat np2fixup_constants[4 * MAX_FRAGMENT_SAMPLERS];
813
814 const struct ps_np2fixup_info *np2Fixup_info = WINEFIXUPINFO_GET(prog);
815
816 for (i = 0; fixup; fixup >>= 1, ++i) {
817 const unsigned char idx = np2Fixup_info->idx[i];
818 const IWineD3DBaseTextureImpl* const tex = (const IWineD3DBaseTextureImpl*) stateBlock->textures[i];
819 GLfloat* tex_dim = &np2fixup_constants[(idx >> 1) * 4];
820
821 if (!tex) {
822 FIXME("Nonexistent texture is flagged for NP2 texcoord fixup\n");
823 continue;
824 }
825
826 if (idx % 2) {
827 tex_dim[2] = tex->baseTexture.pow2Matrix[0]; tex_dim[3] = tex->baseTexture.pow2Matrix[5];
828 } else {
829 tex_dim[0] = tex->baseTexture.pow2Matrix[0]; tex_dim[1] = tex->baseTexture.pow2Matrix[5];
830 }
831 }
832
833 GL_EXTCALL(glUniform4fvARB(prog->np2Fixup_location, np2Fixup_info->num_consts, np2fixup_constants));
834 }
835}
836
837/**
838 * Loads the app-supplied constants into the currently set GLSL program.
839 */
840/* GL locking is done by the caller (state handler) */
841static void shader_glsl_load_constants(const struct wined3d_context *context,
842 char usePixelShader, char useVertexShader)
843{
844 const struct wined3d_gl_info *gl_info = context->gl_info;
845 IWineD3DDeviceImpl *device = context_get_device(context);
846 IWineD3DStateBlockImpl* stateBlock = device->stateBlock;
847 struct shader_glsl_priv *priv = device->shader_priv;
848
849 GLhandleARB programId;
850 struct glsl_shader_prog_link *prog = priv->glsl_program;
851 UINT constant_version;
852 int i;
853
854 if (!prog) {
855 /* No GLSL program set - nothing to do. */
856 return;
857 }
858 programId = prog->programId;
859 constant_version = prog->constant_version;
860
861 if (useVertexShader) {
862 IWineD3DBaseShaderImpl* vshader = (IWineD3DBaseShaderImpl*) stateBlock->vertexShader;
863
864 /* Load DirectX 9 float constants/uniforms for vertex shader */
865 shader_glsl_load_constantsF(vshader, gl_info, stateBlock->vertexShaderConstantF,
866 prog->vuniformF_locations, &priv->vconst_heap, priv->stack, constant_version);
867
868 /* Load DirectX 9 integer constants/uniforms for vertex shader */
869 shader_glsl_load_constantsI(vshader, gl_info, prog->vuniformI_locations, stateBlock->vertexShaderConstantI,
870 stateBlock->changed.vertexShaderConstantsI & vshader->baseShader.reg_maps.integer_constants);
871
872 /* Load DirectX 9 boolean constants/uniforms for vertex shader */
873 shader_glsl_load_constantsB(vshader, gl_info, programId, stateBlock->vertexShaderConstantB,
874 stateBlock->changed.vertexShaderConstantsB & vshader->baseShader.reg_maps.boolean_constants);
875
876 /* Upload the position fixup params */
877 GL_EXTCALL(glUniform4fvARB(prog->posFixup_location, 1, &device->posFixup[0]));
878 checkGLcall("glUniform4fvARB");
879 }
880
881 if (usePixelShader) {
882
883 IWineD3DBaseShaderImpl* pshader = (IWineD3DBaseShaderImpl*) stateBlock->pixelShader;
884
885 /* Load DirectX 9 float constants/uniforms for pixel shader */
886 shader_glsl_load_constantsF(pshader, gl_info, stateBlock->pixelShaderConstantF,
887 prog->puniformF_locations, &priv->pconst_heap, priv->stack, constant_version);
888
889 /* Load DirectX 9 integer constants/uniforms for pixel shader */
890 shader_glsl_load_constantsI(pshader, gl_info, prog->puniformI_locations, stateBlock->pixelShaderConstantI,
891 stateBlock->changed.pixelShaderConstantsI & pshader->baseShader.reg_maps.integer_constants);
892
893 /* Load DirectX 9 boolean constants/uniforms for pixel shader */
894 shader_glsl_load_constantsB(pshader, gl_info, programId, stateBlock->pixelShaderConstantB,
895 stateBlock->changed.pixelShaderConstantsB & pshader->baseShader.reg_maps.boolean_constants);
896
897 /* Upload the environment bump map matrix if needed. The needsbumpmat member specifies the texture stage to load the matrix from.
898 * It can't be 0 for a valid texbem instruction.
899 */
900 for(i = 0; i < MAX_TEXTURES; i++) {
901 const float *data;
902
903 if(prog->bumpenvmat_location[i] == -1) continue;
904
905 data = (const float *)&stateBlock->textureState[i][WINED3DTSS_BUMPENVMAT00];
906 GL_EXTCALL(glUniformMatrix2fvARB(prog->bumpenvmat_location[i], 1, 0, data));
907 checkGLcall("glUniformMatrix2fvARB");
908
909 /* texbeml needs the luminance scale and offset too. If texbeml is used, needsbumpmat
910 * is set too, so we can check that in the needsbumpmat check
911 */
912 if(prog->luminancescale_location[i] != -1) {
913 const GLfloat *scale = (const GLfloat *)&stateBlock->textureState[i][WINED3DTSS_BUMPENVLSCALE];
914 const GLfloat *offset = (const GLfloat *)&stateBlock->textureState[i][WINED3DTSS_BUMPENVLOFFSET];
915
916 GL_EXTCALL(glUniform1fvARB(prog->luminancescale_location[i], 1, scale));
917 checkGLcall("glUniform1fvARB");
918 GL_EXTCALL(glUniform1fvARB(prog->luminanceoffset_location[i], 1, offset));
919 checkGLcall("glUniform1fvARB");
920 }
921 }
922
923 if(((IWineD3DPixelShaderImpl *) pshader)->vpos_uniform) {
924 float correction_params[4];
925
926 if (context->render_offscreen)
927 {
928 correction_params[0] = 0.0f;
929 correction_params[1] = 1.0f;
930 } else {
931 /* position is window relative, not viewport relative */
932#ifdef VBOX_WITH_VMSVGA
933 correction_params[0] = device->rtHeight;
934#else
935 correction_params[0] = ((IWineD3DSurfaceImpl *)context->current_rt)->currentDesc.Height;
936#endif
937 correction_params[1] = -1.0f;
938 }
939 GL_EXTCALL(glUniform4fvARB(prog->ycorrection_location, 1, correction_params));
940 }
941 }
942
943 if (priv->next_constant_version == UINT_MAX)
944 {
945 TRACE("Max constant version reached, resetting to 0.\n");
946 wine_rb_for_each_entry(&priv->program_lookup, reset_program_constant_version, NULL);
947 priv->next_constant_version = 1;
948 }
949 else
950 {
951 prog->constant_version = priv->next_constant_version++;
952 }
953}
954
955static inline void update_heap_entry(struct constant_heap *heap, unsigned int idx,
956 unsigned int heap_idx, DWORD new_version)
957{
958 struct constant_entry *entries = heap->entries;
959 unsigned int *positions = heap->positions;
960 unsigned int parent_idx;
961
962 while (heap_idx > 1)
963 {
964 parent_idx = heap_idx >> 1;
965
966 if (new_version <= entries[parent_idx].version) break;
967
968 entries[heap_idx] = entries[parent_idx];
969 positions[entries[parent_idx].idx] = heap_idx;
970 heap_idx = parent_idx;
971 }
972
973 entries[heap_idx].version = new_version;
974 entries[heap_idx].idx = idx;
975 positions[idx] = heap_idx;
976}
977
978static void shader_glsl_update_float_vertex_constants(IWineD3DDevice *iface, UINT start, UINT count)
979{
980 IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *)iface;
981 struct shader_glsl_priv *priv = This->shader_priv;
982 struct constant_heap *heap = &priv->vconst_heap;
983 UINT i;
984
985 for (i = start; i < count + start; ++i)
986 {
987 if (!This->stateBlock->changed.vertexShaderConstantsF[i])
988 update_heap_entry(heap, i, heap->size++, priv->next_constant_version);
989 else
990 update_heap_entry(heap, i, heap->positions[i], priv->next_constant_version);
991 }
992}
993
994static void shader_glsl_update_float_pixel_constants(IWineD3DDevice *iface, UINT start, UINT count)
995{
996 IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *)iface;
997 struct shader_glsl_priv *priv = This->shader_priv;
998 struct constant_heap *heap = &priv->pconst_heap;
999 UINT i;
1000
1001 for (i = start; i < count + start; ++i)
1002 {
1003 if (!This->stateBlock->changed.pixelShaderConstantsF[i])
1004 update_heap_entry(heap, i, heap->size++, priv->next_constant_version);
1005 else
1006 update_heap_entry(heap, i, heap->positions[i], priv->next_constant_version);
1007 }
1008}
1009
1010static unsigned int vec4_varyings(DWORD shader_major, const struct wined3d_gl_info *gl_info)
1011{
1012 unsigned int ret = gl_info->limits.glsl_varyings / 4;
1013 /* 4.0 shaders do not write clip coords because d3d10 does not support user clipplanes */
1014 if(shader_major > 3) return ret;
1015
1016 /* 3.0 shaders may need an extra varying for the clip coord on some cards(mostly dx10 ones) */
1017 if (gl_info->quirks & WINED3D_QUIRK_GLSL_CLIP_VARYING) ret -= 1;
1018 return ret;
1019}
1020
1021/** Generate the variable & register declarations for the GLSL output target */
1022static void shader_generate_glsl_declarations(const struct wined3d_context *context,
1023 struct wined3d_shader_buffer *buffer, IWineD3DBaseShader *iface,
1024 const shader_reg_maps *reg_maps, struct shader_glsl_ctx_priv *ctx_priv)
1025{
1026 IWineD3DBaseShaderImpl* This = (IWineD3DBaseShaderImpl*) iface;
1027 IWineD3DDeviceImpl *device = (IWineD3DDeviceImpl *) This->baseShader.device;
1028 const struct ps_compile_args *ps_args = ctx_priv->cur_ps_args;
1029 const struct wined3d_gl_info *gl_info = context->gl_info;
1030 unsigned int i, extra_constants_needed = 0;
1031 const local_constant *lconst;
1032 DWORD map;
1033
1034 /* There are some minor differences between pixel and vertex shaders */
1035 char pshader = shader_is_pshader_version(reg_maps->shader_version.type);
1036 char prefix = pshader ? 'P' : 'V';
1037
1038 /* Prototype the subroutines */
1039 for (i = 0, map = reg_maps->labels; map; map >>= 1, ++i)
1040 {
1041 if (map & 1) shader_addline(buffer, "void subroutine%u();\n", i);
1042 }
1043
1044 /* Declare the constants (aka uniforms) */
1045 if (This->baseShader.limits.constant_float > 0) {
1046 unsigned max_constantsF;
1047 /* Unless the shader uses indirect addressing, always declare the maximum array size and ignore that we need some
1048 * uniforms privately. E.g. if GL supports 256 uniforms, and we need 2 for the pos fixup and immediate values, still
1049 * declare VC[256]. If the shader needs more uniforms than we have it won't work in any case. If it uses less, the
1050 * compiler will figure out which uniforms are really used and strip them out. This allows a shader to use c255 on
1051 * a dx9 card, as long as it doesn't also use all the other constants.
1052 *
1053 * If the shader uses indirect addressing the compiler must assume that all declared uniforms are used. In this case,
1054 * declare only the amount that we're assured to have.
1055 *
1056 * Thus we run into problems in these two cases:
1057 * 1) The shader really uses more uniforms than supported
1058 * 2) The shader uses indirect addressing, less constants than supported, but uses a constant index > #supported consts
1059 */
1060 if (pshader)
1061 {
1062 /* No indirect addressing here. */
1063 max_constantsF = gl_info->limits.glsl_ps_float_constants;
1064 }
1065 else
1066 {
1067 if(This->baseShader.reg_maps.usesrelconstF) {
1068 /* Subtract the other potential uniforms from the max available (bools, ints, and 1 row of projection matrix).
1069 * Subtract another uniform for immediate values, which have to be loaded via uniform by the driver as well.
1070 * The shader code only uses 0.5, 2.0, 1.0, 128 and -128 in vertex shader code, so one vec4 should be enough
1071 * (Unfortunately the Nvidia driver doesn't store 128 and -128 in one float).
1072 *
1073 * Writing gl_ClipVertex requires one uniform for each clipplane as well.
1074 */
1075#ifdef VBOX_WITH_WDDM
1076 if (gl_info->limits.glsl_vs_float_constants == 256)
1077 {
1078 DWORD dwVersion = GetVersion();
1079 DWORD dwMajor = (DWORD)(LOBYTE(LOWORD(dwVersion)));
1080 DWORD dwMinor = (DWORD)(HIBYTE(LOWORD(dwVersion)));
1081 /* tmp workaround Win8 Aero requirement for 256 */
1082 if (dwMajor > 6 || dwMinor > 1)
1083 {
1084 /* tmp work-around to make Internet Explorer in win8 work with GPU supporting only with 256 shader uniform vars
1085 * @todo: make it more robust */
1086 max_constantsF = gl_info->limits.glsl_vs_float_constants - 1;
1087 }
1088 else
1089 max_constantsF = gl_info->limits.glsl_vs_float_constants - 3;
1090 }
1091 else
1092#endif
1093 {
1094 max_constantsF = gl_info->limits.glsl_vs_float_constants - 3;
1095 }
1096
1097 if(ctx_priv->cur_vs_args->clip_enabled)
1098 {
1099 max_constantsF -= gl_info->limits.clipplanes;
1100 }
1101 max_constantsF -= count_bits(This->baseShader.reg_maps.integer_constants);
1102 /* Strictly speaking a bool only uses one scalar, but the nvidia(Linux) compiler doesn't pack them properly,
1103 * so each scalar requires a full vec4. We could work around this by packing the booleans ourselves, but
1104 * for now take this into account when calculating the number of available constants
1105 */
1106 max_constantsF -= count_bits(This->baseShader.reg_maps.boolean_constants);
1107 /* Set by driver quirks in directx.c */
1108 max_constantsF -= gl_info->reserved_glsl_constants;
1109 }
1110 else
1111 {
1112 max_constantsF = gl_info->limits.glsl_vs_float_constants;
1113 }
1114 }
1115 max_constantsF = min(This->baseShader.limits.constant_float, max_constantsF);
1116 shader_addline(buffer, "uniform vec4 %cC[%u];\n", prefix, max_constantsF);
1117 }
1118
1119 /* Always declare the full set of constants, the compiler can remove the unused ones because d3d doesn't(yet)
1120 * support indirect int and bool constant addressing. This avoids problems if the app uses e.g. i0 and i9.
1121 */
1122 if (This->baseShader.limits.constant_int > 0 && This->baseShader.reg_maps.integer_constants)
1123 shader_addline(buffer, "uniform ivec4 %cI[%u];\n", prefix, This->baseShader.limits.constant_int);
1124
1125 if (This->baseShader.limits.constant_bool > 0 && This->baseShader.reg_maps.boolean_constants)
1126 shader_addline(buffer, "uniform bool %cB[%u];\n", prefix, This->baseShader.limits.constant_bool);
1127
1128 if(!pshader) {
1129 shader_addline(buffer, "uniform vec4 posFixup;\n");
1130 /* Predeclaration; This function is added at link time based on the pixel shader.
1131 * VS 3.0 shaders have an array OUT[] the shader writes to, earlier versions don't have
1132 * that. We know the input to the reorder function at vertex shader compile time, so
1133 * we can deal with that. The reorder function for a 1.x and 2.x vertex shader can just
1134 * read gl_FrontColor. The output depends on the pixel shader. The reorder function for a
1135 * 1.x and 2.x pshader or for fixed function will write gl_FrontColor, and for a 3.0 shader
1136 * it will write to the varying array. Here we depend on the shader optimizer on sorting that
1137 * out. The nvidia driver only does that if the parameter is inout instead of out, hence the
1138 * inout.
1139 */
1140 if (reg_maps->shader_version.major >= 3)
1141 {
1142 shader_addline(buffer, "void order_ps_input(in vec4[%u]);\n", MAX_REG_OUTPUT);
1143 } else {
1144 shader_addline(buffer, "void order_ps_input();\n");
1145 }
1146 } else {
1147 for (i = 0, map = reg_maps->bumpmat; map; map >>= 1, ++i)
1148 {
1149 if (!(map & 1)) continue;
1150
1151 shader_addline(buffer, "uniform mat2 bumpenvmat%d;\n", i);
1152
1153 if (reg_maps->luminanceparams & (1 << i))
1154 {
1155 shader_addline(buffer, "uniform float luminancescale%d;\n", i);
1156 shader_addline(buffer, "uniform float luminanceoffset%d;\n", i);
1157 extra_constants_needed++;
1158 }
1159
1160 extra_constants_needed++;
1161 }
1162
1163 if (ps_args->srgb_correction)
1164 {
1165 shader_addline(buffer, "const vec4 srgb_const0 = vec4(%.8e, %.8e, %.8e, %.8e);\n",
1166 srgb_pow, srgb_mul_high, srgb_sub_high, srgb_mul_low);
1167 shader_addline(buffer, "const vec4 srgb_const1 = vec4(%.8e, 0.0, 0.0, 0.0);\n",
1168 srgb_cmp);
1169 }
1170 if (reg_maps->vpos || reg_maps->usesdsy)
1171 {
1172 if (This->baseShader.limits.constant_float + extra_constants_needed
1173 + 1 < gl_info->limits.glsl_ps_float_constants)
1174 {
1175 shader_addline(buffer, "uniform vec4 ycorrection;\n");
1176 ((IWineD3DPixelShaderImpl *) This)->vpos_uniform = 1;
1177 extra_constants_needed++;
1178 } else {
1179 /* This happens because we do not have proper tracking of the constant registers that are
1180 * actually used, only the max limit of the shader version
1181 */
1182 FIXME("Cannot find a free uniform for vpos correction params\n");
1183 AssertFailed();
1184 shader_addline(buffer, "const vec4 ycorrection = vec4(%f, %f, 0.0, 0.0);\n",
1185 context->render_offscreen ? 0.0f : ((IWineD3DSurfaceImpl *)device->render_targets[0])->currentDesc.Height,
1186 context->render_offscreen ? 1.0f : -1.0f);
1187 }
1188 shader_addline(buffer, "vec4 vpos;\n");
1189 }
1190 }
1191
1192 /* Declare texture samplers */
1193 for (i = 0; i < This->baseShader.limits.sampler; i++) {
1194 if (reg_maps->sampler_type[i])
1195 {
1196 switch (reg_maps->sampler_type[i])
1197 {
1198 case WINED3DSTT_1D:
1199 shader_addline(buffer, "uniform sampler1D %csampler%u;\n", prefix, i);
1200 break;
1201 case WINED3DSTT_2D:
1202 if(device->stateBlock->textures[i] &&
1203 IWineD3DBaseTexture_GetTextureDimensions(device->stateBlock->textures[i]) == GL_TEXTURE_RECTANGLE_ARB) {
1204 shader_addline(buffer, "uniform sampler2DRect %csampler%u;\n", prefix, i);
1205 } else {
1206 shader_addline(buffer, "uniform sampler2D %csampler%u;\n", prefix, i);
1207 }
1208 break;
1209 case WINED3DSTT_CUBE:
1210 shader_addline(buffer, "uniform samplerCube %csampler%u;\n", prefix, i);
1211 break;
1212 case WINED3DSTT_VOLUME:
1213 shader_addline(buffer, "uniform sampler3D %csampler%u;\n", prefix, i);
1214 break;
1215 default:
1216 shader_addline(buffer, "uniform unsupported_sampler %csampler%u;\n", prefix, i);
1217 FIXME("Unrecognized sampler type: %#x\n", reg_maps->sampler_type[i]);
1218 break;
1219 }
1220 }
1221 }
1222
1223 /* Declare uniforms for NP2 texcoord fixup:
1224 * This is NOT done inside the loop that declares the texture samplers since the NP2 fixup code
1225 * is currently only used for the GeforceFX series and when forcing the ARB_npot extension off.
1226 * Modern cards just skip the code anyway, so put it inside a separate loop. */
1227 if (pshader && ps_args->np2_fixup) {
1228
1229 struct ps_np2fixup_info* const fixup = ctx_priv->cur_np2fixup_info;
1230 UINT cur = 0;
1231
1232 /* NP2/RECT textures in OpenGL use texcoords in the range [0,width]x[0,height]
1233 * while D3D has them in the (normalized) [0,1]x[0,1] range.
1234 * samplerNP2Fixup stores texture dimensions and is updated through
1235 * shader_glsl_load_np2fixup_constants when the sampler changes. */
1236
1237 for (i = 0; i < This->baseShader.limits.sampler; ++i) {
1238 if (reg_maps->sampler_type[i]) {
1239 if (!(ps_args->np2_fixup & (1 << i))) continue;
1240
1241 if (WINED3DSTT_2D != reg_maps->sampler_type[i]) {
1242 FIXME("Non-2D texture is flagged for NP2 texcoord fixup.\n");
1243 continue;
1244 }
1245
1246 fixup->idx[i] = cur++;
1247 }
1248 }
1249
1250 fixup->num_consts = (cur + 1) >> 1;
1251 shader_addline(buffer, "uniform vec4 %csamplerNP2Fixup[%u];\n", prefix, fixup->num_consts);
1252 }
1253
1254 /* Declare address variables */
1255 for (i = 0, map = reg_maps->address; map; map >>= 1, ++i)
1256 {
1257 if (map & 1) shader_addline(buffer, "ivec4 A%u;\n", i);
1258 }
1259
1260 /* Declare texture coordinate temporaries and initialize them */
1261 for (i = 0, map = reg_maps->texcoord; map; map >>= 1, ++i)
1262 {
1263 if (map & 1) shader_addline(buffer, "vec4 T%u = gl_TexCoord[%u];\n", i, i);
1264 }
1265
1266 /* Declare input register varyings. Only pixel shader, vertex shaders have that declared in the
1267 * helper function shader that is linked in at link time
1268 */
1269 if (pshader && reg_maps->shader_version.major >= 3)
1270 {
1271 if (use_vs(device->stateBlock))
1272 {
1273 shader_addline(buffer, "varying vec4 IN[%u];\n", vec4_varyings(reg_maps->shader_version.major, gl_info));
1274 } else {
1275 /* TODO: Write a replacement shader for the fixed function vertex pipeline, so this isn't needed.
1276 * For fixed function vertex processing + 3.0 pixel shader we need a separate function in the
1277 * pixel shader that reads the fixed function color into the packed input registers.
1278 */
1279 shader_addline(buffer, "vec4 IN[%u];\n", vec4_varyings(reg_maps->shader_version.major, gl_info));
1280 }
1281 }
1282
1283 /* Declare output register temporaries */
1284 if(This->baseShader.limits.packed_output) {
1285 shader_addline(buffer, "vec4 OUT[%u];\n", This->baseShader.limits.packed_output);
1286 }
1287
1288 /* Declare temporary variables */
1289 for (i = 0, map = reg_maps->temporary; map; map >>= 1, ++i)
1290 {
1291 if (map & 1) shader_addline(buffer, "vec4 R%u;\n", i);
1292 }
1293
1294 /* Declare attributes */
1295 if (reg_maps->shader_version.type == WINED3D_SHADER_TYPE_VERTEX)
1296 {
1297 for (i = 0, map = reg_maps->input_registers; map; map >>= 1, ++i)
1298 {
1299 if (map & 1) shader_addline(buffer, "attribute vec4 attrib%i;\n", i);
1300 }
1301 }
1302
1303 /* Declare loop registers aLx */
1304 for (i = 0; i < reg_maps->loop_depth; i++) {
1305 shader_addline(buffer, "int aL%u;\n", i);
1306 shader_addline(buffer, "int tmpInt%u;\n", i);
1307 }
1308
1309 /* Temporary variables for matrix operations */
1310 shader_addline(buffer, "vec4 tmp0;\n");
1311 shader_addline(buffer, "vec4 tmp1;\n");
1312
1313 /* Local constants use a different name so they can be loaded once at shader link time
1314 * They can't be hardcoded into the shader text via LC = {x, y, z, w}; because the
1315 * float -> string conversion can cause precision loss.
1316 */
1317 if(!This->baseShader.load_local_constsF) {
1318 LIST_FOR_EACH_ENTRY(lconst, &This->baseShader.constantsF, local_constant, entry) {
1319 shader_addline(buffer, "uniform vec4 %cLC%u;\n", prefix, lconst->idx);
1320 }
1321 }
1322
1323 shader_addline(buffer, "const float FLT_MAX = 1e38;\n");
1324
1325 /* Start the main program */
1326 shader_addline(buffer, "void main() {\n");
1327 if(pshader && reg_maps->vpos) {
1328 /* DirectX apps expect integer values, while OpenGL drivers add approximately 0.5. This causes
1329 * off-by-one problems as spotted by the vPos d3d9 visual test. Unfortunately the ATI cards do
1330 * not add exactly 0.5, but rather something like 0.49999999 or 0.50000001, which still causes
1331 * precision troubles when we just substract 0.5.
1332 *
1333 * To deal with that just floor() the position. This will eliminate the fraction on all cards.
1334 *
1335 * TODO: Test how that behaves with multisampling once we can enable multisampling in winex11.
1336 *
1337 * An advantage of floor is that it works even if the driver doesn't add 1/2. It is somewhat
1338 * questionable if 1.5, 2.5, ... are the proper values to return in gl_FragCoord, even though
1339 * coordinates specify the pixel centers instead of the pixel corners. This code will behave
1340 * correctly on drivers that returns integer values.
1341 */
1342 shader_addline(buffer, "vpos = floor(vec4(0, ycorrection[0], 0, 0) + gl_FragCoord * vec4(1, ycorrection[1], 1, 1));\n");
1343 }
1344}
1345
1346/*****************************************************************************
1347 * Functions to generate GLSL strings from DirectX Shader bytecode begin here.
1348 *
1349 * For more information, see http://wiki.winehq.org/DirectX-Shaders
1350 ****************************************************************************/
1351
1352/* Prototypes */
1353static void shader_glsl_add_src_param(const struct wined3d_shader_instruction *ins,
1354 const struct wined3d_shader_src_param *wined3d_src, DWORD mask, glsl_src_param_t *glsl_src);
1355
1356/** Used for opcode modifiers - They multiply the result by the specified amount */
1357static const char * const shift_glsl_tab[] = {
1358 "", /* 0 (none) */
1359 "2.0 * ", /* 1 (x2) */
1360 "4.0 * ", /* 2 (x4) */
1361 "8.0 * ", /* 3 (x8) */
1362 "16.0 * ", /* 4 (x16) */
1363 "32.0 * ", /* 5 (x32) */
1364 "", /* 6 (x64) */
1365 "", /* 7 (x128) */
1366 "", /* 8 (d256) */
1367 "", /* 9 (d128) */
1368 "", /* 10 (d64) */
1369 "", /* 11 (d32) */
1370 "0.0625 * ", /* 12 (d16) */
1371 "0.125 * ", /* 13 (d8) */
1372 "0.25 * ", /* 14 (d4) */
1373 "0.5 * " /* 15 (d2) */
1374};
1375
1376/* Generate a GLSL parameter that does the input modifier computation and return the input register/mask to use */
1377static void shader_glsl_gen_modifier(DWORD src_modifier, const char *in_reg, const char *in_regswizzle, char *out_str)
1378{
1379 out_str[0] = 0;
1380
1381 switch (src_modifier)
1382 {
1383 case WINED3DSPSM_DZ: /* Need to handle this in the instructions itself (texld & texcrd). */
1384 case WINED3DSPSM_DW:
1385 case WINED3DSPSM_NONE:
1386 sprintf(out_str, "%s%s", in_reg, in_regswizzle);
1387 break;
1388 case WINED3DSPSM_NEG:
1389 sprintf(out_str, "-%s%s", in_reg, in_regswizzle);
1390 break;
1391 case WINED3DSPSM_NOT:
1392 sprintf(out_str, "!%s%s", in_reg, in_regswizzle);
1393 break;
1394 case WINED3DSPSM_BIAS:
1395 sprintf(out_str, "(%s%s - vec4(0.5)%s)", in_reg, in_regswizzle, in_regswizzle);
1396 break;
1397 case WINED3DSPSM_BIASNEG:
1398 sprintf(out_str, "-(%s%s - vec4(0.5)%s)", in_reg, in_regswizzle, in_regswizzle);
1399 break;
1400 case WINED3DSPSM_SIGN:
1401 sprintf(out_str, "(2.0 * (%s%s - 0.5))", in_reg, in_regswizzle);
1402 break;
1403 case WINED3DSPSM_SIGNNEG:
1404 sprintf(out_str, "-(2.0 * (%s%s - 0.5))", in_reg, in_regswizzle);
1405 break;
1406 case WINED3DSPSM_COMP:
1407 sprintf(out_str, "(1.0 - %s%s)", in_reg, in_regswizzle);
1408 break;
1409 case WINED3DSPSM_X2:
1410 sprintf(out_str, "(2.0 * %s%s)", in_reg, in_regswizzle);
1411 break;
1412 case WINED3DSPSM_X2NEG:
1413 sprintf(out_str, "-(2.0 * %s%s)", in_reg, in_regswizzle);
1414 break;
1415 case WINED3DSPSM_ABS:
1416 sprintf(out_str, "abs(%s%s)", in_reg, in_regswizzle);
1417 break;
1418 case WINED3DSPSM_ABSNEG:
1419 sprintf(out_str, "-abs(%s%s)", in_reg, in_regswizzle);
1420 break;
1421 default:
1422 FIXME("Unhandled modifier %u\n", src_modifier);
1423 sprintf(out_str, "%s%s", in_reg, in_regswizzle);
1424 }
1425}
1426
1427/** Writes the GLSL variable name that corresponds to the register that the
1428 * DX opcode parameter is trying to access */
1429static void shader_glsl_get_register_name(const struct wined3d_shader_register *reg,
1430 char *register_name, BOOL *is_color, const struct wined3d_shader_instruction *ins)
1431{
1432 /* oPos, oFog and oPts in D3D */
1433 static const char * const hwrastout_reg_names[] = { "gl_Position", "gl_FogFragCoord", "gl_PointSize" };
1434
1435 IWineD3DBaseShaderImpl *This = (IWineD3DBaseShaderImpl *)ins->ctx->shader;
1436 const struct wined3d_gl_info *gl_info = ins->ctx->gl_info;
1437 char pshader = shader_is_pshader_version(This->baseShader.reg_maps.shader_version.type);
1438
1439 *is_color = FALSE;
1440
1441 switch (reg->type)
1442 {
1443 case WINED3DSPR_TEMP:
1444 sprintf(register_name, "R%u", reg->idx);
1445 break;
1446
1447 case WINED3DSPR_INPUT:
1448 /* vertex shaders */
1449 if (!pshader)
1450 {
1451 struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data;
1452 if (priv->cur_vs_args->swizzle_map & (1 << reg->idx)) *is_color = TRUE;
1453 sprintf(register_name, "attrib%u", reg->idx);
1454 break;
1455 }
1456
1457 /* pixel shaders >= 3.0 */
1458 if (This->baseShader.reg_maps.shader_version.major >= 3)
1459 {
1460 DWORD idx = ((IWineD3DPixelShaderImpl *)This)->input_reg_map[reg->idx];
1461 unsigned int in_count = vec4_varyings(This->baseShader.reg_maps.shader_version.major, gl_info);
1462
1463 if (reg->rel_addr)
1464 {
1465 glsl_src_param_t rel_param;
1466
1467 shader_glsl_add_src_param(ins, reg->rel_addr, WINED3DSP_WRITEMASK_0, &rel_param);
1468
1469 /* Removing a + 0 would be an obvious optimization, but macos doesn't see the NOP
1470 * operation there */
1471 if (idx)
1472 {
1473 if (((IWineD3DPixelShaderImpl *)This)->declared_in_count > in_count)
1474 {
1475 sprintf(register_name,
1476 "((%s + %u) > %d ? (%s + %u) > %d ? gl_SecondaryColor : gl_Color : IN[%s + %u])",
1477 rel_param.param_str, idx, in_count - 1, rel_param.param_str, idx, in_count,
1478 rel_param.param_str, idx);
1479 }
1480 else
1481 {
1482 sprintf(register_name, "IN[%s + %u]", rel_param.param_str, idx);
1483 }
1484 }
1485 else
1486 {
1487 if (((IWineD3DPixelShaderImpl *)This)->declared_in_count > in_count)
1488 {
1489 sprintf(register_name, "((%s) > %d ? (%s) > %d ? gl_SecondaryColor : gl_Color : IN[%s])",
1490 rel_param.param_str, in_count - 1, rel_param.param_str, in_count,
1491 rel_param.param_str);
1492 }
1493 else
1494 {
1495 sprintf(register_name, "IN[%s]", rel_param.param_str);
1496 }
1497 }
1498 }
1499 else
1500 {
1501 if (idx == in_count) sprintf(register_name, "gl_Color");
1502 else if (idx == in_count + 1) sprintf(register_name, "gl_SecondaryColor");
1503 else sprintf(register_name, "IN[%u]", idx);
1504 }
1505 }
1506 else
1507 {
1508 if (reg->idx == 0) strcpy(register_name, "gl_Color");
1509 else strcpy(register_name, "gl_SecondaryColor");
1510 break;
1511 }
1512 break;
1513
1514 case WINED3DSPR_CONST:
1515 {
1516 const char prefix = pshader ? 'P' : 'V';
1517
1518 /* Relative addressing */
1519 if (reg->rel_addr)
1520 {
1521 glsl_src_param_t rel_param;
1522 shader_glsl_add_src_param(ins, reg->rel_addr, WINED3DSP_WRITEMASK_0, &rel_param);
1523 if (reg->idx) sprintf(register_name, "%cC[%s + %u]", prefix, rel_param.param_str, reg->idx);
1524 else sprintf(register_name, "%cC[%s]", prefix, rel_param.param_str);
1525 }
1526 else
1527 {
1528 if (shader_constant_is_local(This, reg->idx))
1529 sprintf(register_name, "%cLC%u", prefix, reg->idx);
1530 else
1531 sprintf(register_name, "%cC[%u]", prefix, reg->idx);
1532 }
1533 }
1534 break;
1535
1536 case WINED3DSPR_CONSTINT:
1537 if (pshader) sprintf(register_name, "PI[%u]", reg->idx);
1538 else sprintf(register_name, "VI[%u]", reg->idx);
1539 break;
1540
1541 case WINED3DSPR_CONSTBOOL:
1542 if (pshader) sprintf(register_name, "PB[%u]", reg->idx);
1543 else sprintf(register_name, "VB[%u]", reg->idx);
1544 break;
1545
1546 case WINED3DSPR_TEXTURE: /* case WINED3DSPR_ADDR: */
1547 if (pshader) sprintf(register_name, "T%u", reg->idx);
1548 else sprintf(register_name, "A%u", reg->idx);
1549 break;
1550
1551 case WINED3DSPR_LOOP:
1552 sprintf(register_name, "aL%u", This->baseShader.cur_loop_regno - 1);
1553 break;
1554
1555 case WINED3DSPR_SAMPLER:
1556 if (pshader) sprintf(register_name, "Psampler%u", reg->idx);
1557 else sprintf(register_name, "Vsampler%u", reg->idx);
1558 break;
1559
1560 case WINED3DSPR_COLOROUT:
1561 if (reg->idx >= gl_info->limits.buffers)
1562 WARN("Write to render target %u, only %d supported.\n", reg->idx, gl_info->limits.buffers);
1563
1564 sprintf(register_name, "gl_FragData[%u]", reg->idx);
1565 break;
1566
1567 case WINED3DSPR_RASTOUT:
1568 sprintf(register_name, "%s", hwrastout_reg_names[reg->idx]);
1569 break;
1570
1571 case WINED3DSPR_DEPTHOUT:
1572 sprintf(register_name, "gl_FragDepth");
1573 break;
1574
1575 case WINED3DSPR_ATTROUT:
1576 if (reg->idx == 0) sprintf(register_name, "gl_FrontColor");
1577 else sprintf(register_name, "gl_FrontSecondaryColor");
1578 break;
1579
1580 case WINED3DSPR_TEXCRDOUT:
1581 /* Vertex shaders >= 3.0: WINED3DSPR_OUTPUT */
1582 if (This->baseShader.reg_maps.shader_version.major >= 3) sprintf(register_name, "OUT[%u]", reg->idx);
1583 else sprintf(register_name, "gl_TexCoord[%u]", reg->idx);
1584 break;
1585
1586 case WINED3DSPR_MISCTYPE:
1587 if (reg->idx == 0)
1588 {
1589 /* vPos */
1590 sprintf(register_name, "vpos");
1591 }
1592 else if (reg->idx == 1)
1593 {
1594 /* Note that gl_FrontFacing is a bool, while vFace is
1595 * a float for which the sign determines front/back */
1596 sprintf(register_name, "(gl_FrontFacing ? 1.0 : -1.0)");
1597 }
1598 else
1599 {
1600 FIXME("Unhandled misctype register %d\n", reg->idx);
1601 sprintf(register_name, "unrecognized_register");
1602 }
1603 break;
1604
1605 case WINED3DSPR_IMMCONST:
1606 switch (reg->immconst_type)
1607 {
1608 case WINED3D_IMMCONST_FLOAT:
1609 sprintf(register_name, "%.8e", *(const float *)reg->immconst_data);
1610 break;
1611
1612 case WINED3D_IMMCONST_FLOAT4:
1613 sprintf(register_name, "vec4(%.8e, %.8e, %.8e, %.8e)",
1614 *(const float *)&reg->immconst_data[0], *(const float *)&reg->immconst_data[1],
1615 *(const float *)&reg->immconst_data[2], *(const float *)&reg->immconst_data[3]);
1616 break;
1617
1618 default:
1619 FIXME("Unhandled immconst type %#x\n", reg->immconst_type);
1620 sprintf(register_name, "<unhandled_immconst_type %#x>", reg->immconst_type);
1621 }
1622 break;
1623
1624 default:
1625 FIXME("Unhandled register name Type(%d)\n", reg->type);
1626 sprintf(register_name, "unrecognized_register");
1627 break;
1628 }
1629}
1630
1631static void shader_glsl_write_mask_to_str(DWORD write_mask, char *str)
1632{
1633 *str++ = '.';
1634 if (write_mask & WINED3DSP_WRITEMASK_0) *str++ = 'x';
1635 if (write_mask & WINED3DSP_WRITEMASK_1) *str++ = 'y';
1636 if (write_mask & WINED3DSP_WRITEMASK_2) *str++ = 'z';
1637 if (write_mask & WINED3DSP_WRITEMASK_3) *str++ = 'w';
1638 *str = '\0';
1639}
1640
1641/* Get the GLSL write mask for the destination register */
1642static DWORD shader_glsl_get_write_mask(const struct wined3d_shader_dst_param *param, char *write_mask)
1643{
1644 DWORD mask = param->write_mask;
1645
1646 if (shader_is_scalar(&param->reg))
1647 {
1648 mask = WINED3DSP_WRITEMASK_0;
1649 *write_mask = '\0';
1650 }
1651 else
1652 {
1653 shader_glsl_write_mask_to_str(mask, write_mask);
1654 }
1655
1656 return mask;
1657}
1658
1659static unsigned int shader_glsl_get_write_mask_size(DWORD write_mask) {
1660 unsigned int size = 0;
1661
1662 if (write_mask & WINED3DSP_WRITEMASK_0) ++size;
1663 if (write_mask & WINED3DSP_WRITEMASK_1) ++size;
1664 if (write_mask & WINED3DSP_WRITEMASK_2) ++size;
1665 if (write_mask & WINED3DSP_WRITEMASK_3) ++size;
1666
1667 return size;
1668}
1669
1670static void shader_glsl_swizzle_to_str(const DWORD swizzle, BOOL fixup, DWORD mask, char *str)
1671{
1672 /* For registers of type WINED3DDECLTYPE_D3DCOLOR, data is stored as "bgra",
1673 * but addressed as "rgba". To fix this we need to swap the register's x
1674 * and z components. */
1675 const char *swizzle_chars = fixup ? "zyxw" : "xyzw";
1676
1677 *str++ = '.';
1678 /* swizzle bits fields: wwzzyyxx */
1679 if (mask & WINED3DSP_WRITEMASK_0) *str++ = swizzle_chars[swizzle & 0x03];
1680 if (mask & WINED3DSP_WRITEMASK_1) *str++ = swizzle_chars[(swizzle >> 2) & 0x03];
1681 if (mask & WINED3DSP_WRITEMASK_2) *str++ = swizzle_chars[(swizzle >> 4) & 0x03];
1682 if (mask & WINED3DSP_WRITEMASK_3) *str++ = swizzle_chars[(swizzle >> 6) & 0x03];
1683 *str = '\0';
1684}
1685
1686static void shader_glsl_get_swizzle(const struct wined3d_shader_src_param *param,
1687 BOOL fixup, DWORD mask, char *swizzle_str)
1688{
1689 if (shader_is_scalar(&param->reg))
1690 *swizzle_str = '\0';
1691 else
1692 shader_glsl_swizzle_to_str(param->swizzle, fixup, mask, swizzle_str);
1693}
1694
1695/* From a given parameter token, generate the corresponding GLSL string.
1696 * Also, return the actual register name and swizzle in case the
1697 * caller needs this information as well. */
1698static void shader_glsl_add_src_param(const struct wined3d_shader_instruction *ins,
1699 const struct wined3d_shader_src_param *wined3d_src, DWORD mask, glsl_src_param_t *glsl_src)
1700{
1701 BOOL is_color = FALSE;
1702 char swizzle_str[6];
1703
1704 glsl_src->reg_name[0] = '\0';
1705 glsl_src->param_str[0] = '\0';
1706 swizzle_str[0] = '\0';
1707
1708 shader_glsl_get_register_name(&wined3d_src->reg, glsl_src->reg_name, &is_color, ins);
1709 shader_glsl_get_swizzle(wined3d_src, is_color, mask, swizzle_str);
1710 shader_glsl_gen_modifier(wined3d_src->modifiers, glsl_src->reg_name, swizzle_str, glsl_src->param_str);
1711}
1712
1713/* From a given parameter token, generate the corresponding GLSL string.
1714 * Also, return the actual register name and swizzle in case the
1715 * caller needs this information as well. */
1716static DWORD shader_glsl_add_dst_param(const struct wined3d_shader_instruction *ins,
1717 const struct wined3d_shader_dst_param *wined3d_dst, glsl_dst_param_t *glsl_dst)
1718{
1719 BOOL is_color = FALSE;
1720
1721 glsl_dst->mask_str[0] = '\0';
1722 glsl_dst->reg_name[0] = '\0';
1723
1724 shader_glsl_get_register_name(&wined3d_dst->reg, glsl_dst->reg_name, &is_color, ins);
1725 return shader_glsl_get_write_mask(wined3d_dst, glsl_dst->mask_str);
1726}
1727
1728/* Append the destination part of the instruction to the buffer, return the effective write mask */
1729static DWORD shader_glsl_append_dst_ext(struct wined3d_shader_buffer *buffer,
1730 const struct wined3d_shader_instruction *ins, const struct wined3d_shader_dst_param *dst)
1731{
1732 glsl_dst_param_t glsl_dst;
1733 DWORD mask;
1734
1735 mask = shader_glsl_add_dst_param(ins, dst, &glsl_dst);
1736 if (mask) shader_addline(buffer, "%s%s = %s(", glsl_dst.reg_name, glsl_dst.mask_str, shift_glsl_tab[dst->shift]);
1737
1738 return mask;
1739}
1740
1741/* Append the destination part of the instruction to the buffer, return the effective write mask */
1742static DWORD shader_glsl_append_dst(struct wined3d_shader_buffer *buffer, const struct wined3d_shader_instruction *ins)
1743{
1744 return shader_glsl_append_dst_ext(buffer, ins, &ins->dst[0]);
1745}
1746
1747/** Process GLSL instruction modifiers */
1748static void shader_glsl_add_instruction_modifiers(const struct wined3d_shader_instruction *ins)
1749{
1750 glsl_dst_param_t dst_param;
1751 DWORD modifiers;
1752
1753 if (!ins->dst_count) return;
1754
1755 modifiers = ins->dst[0].modifiers;
1756 if (!modifiers) return;
1757
1758 shader_glsl_add_dst_param(ins, &ins->dst[0], &dst_param);
1759
1760 if (modifiers & WINED3DSPDM_SATURATE)
1761 {
1762 /* _SAT means to clamp the value of the register to between 0 and 1 */
1763 shader_addline(ins->ctx->buffer, "%s%s = clamp(%s%s, 0.0, 1.0);\n", dst_param.reg_name,
1764 dst_param.mask_str, dst_param.reg_name, dst_param.mask_str);
1765 }
1766
1767 if (modifiers & WINED3DSPDM_MSAMPCENTROID)
1768 {
1769 FIXME("_centroid modifier not handled\n");
1770 }
1771
1772 if (modifiers & WINED3DSPDM_PARTIALPRECISION)
1773 {
1774 /* MSDN says this modifier can be safely ignored, so that's what we'll do. */
1775 }
1776}
1777
1778static inline const char *shader_get_comp_op(DWORD op)
1779{
1780 switch (op) {
1781 case COMPARISON_GT: return ">";
1782 case COMPARISON_EQ: return "==";
1783 case COMPARISON_GE: return ">=";
1784 case COMPARISON_LT: return "<";
1785 case COMPARISON_NE: return "!=";
1786 case COMPARISON_LE: return "<=";
1787 default:
1788 FIXME("Unrecognized comparison value: %u\n", op);
1789 return "(\?\?)";
1790 }
1791}
1792
1793static void shader_glsl_get_sample_function(const struct wined3d_gl_info *gl_info,
1794 DWORD sampler_type, DWORD flags, glsl_sample_function_t *sample_function)
1795{
1796 BOOL projected = flags & WINED3D_GLSL_SAMPLE_PROJECTED;
1797 BOOL texrect = flags & WINED3D_GLSL_SAMPLE_RECT;
1798 BOOL lod = flags & WINED3D_GLSL_SAMPLE_LOD;
1799 BOOL grad = flags & WINED3D_GLSL_SAMPLE_GRAD;
1800
1801 /* Note that there's no such thing as a projected cube texture. */
1802 switch(sampler_type) {
1803 case WINED3DSTT_1D:
1804 if(lod) {
1805 sample_function->name = projected ? "texture1DProjLod" : "texture1DLod";
1806 }
1807 else if (grad)
1808 {
1809 if (gl_info->supported[EXT_GPU_SHADER4])
1810 sample_function->name = projected ? "texture1DProjGrad" : "texture1DGrad";
1811 else if (gl_info->supported[ARB_SHADER_TEXTURE_LOD])
1812 sample_function->name = projected ? "texture1DProjGradARB" : "texture1DGradARB";
1813 else
1814 {
1815 FIXME("Unsupported 1D grad function.\n");
1816 sample_function->name = "unsupported1DGrad";
1817 }
1818 }
1819 else
1820 {
1821 sample_function->name = projected ? "texture1DProj" : "texture1D";
1822 }
1823 sample_function->coord_mask = WINED3DSP_WRITEMASK_0;
1824 break;
1825 case WINED3DSTT_2D:
1826 if(texrect) {
1827 if(lod) {
1828 sample_function->name = projected ? "texture2DRectProjLod" : "texture2DRectLod";
1829 }
1830 else if (grad)
1831 {
1832 if (gl_info->supported[EXT_GPU_SHADER4])
1833 sample_function->name = projected ? "texture2DRectProjGrad" : "texture2DRectGrad";
1834 else if (gl_info->supported[ARB_SHADER_TEXTURE_LOD])
1835 sample_function->name = projected ? "texture2DRectProjGradARB" : "texture2DRectGradARB";
1836 else
1837 {
1838 FIXME("Unsupported RECT grad function.\n");
1839 sample_function->name = "unsupported2DRectGrad";
1840 }
1841 }
1842 else
1843 {
1844 sample_function->name = projected ? "texture2DRectProj" : "texture2DRect";
1845 }
1846 } else {
1847 if(lod) {
1848 sample_function->name = projected ? "texture2DProjLod" : "texture2DLod";
1849 }
1850 else if (grad)
1851 {
1852 if (gl_info->supported[EXT_GPU_SHADER4])
1853 sample_function->name = projected ? "texture2DProjGrad" : "texture2DGrad";
1854 else if (gl_info->supported[ARB_SHADER_TEXTURE_LOD])
1855 sample_function->name = projected ? "texture2DProjGradARB" : "texture2DGradARB";
1856 else
1857 {
1858 FIXME("Unsupported 2D grad function.\n");
1859 sample_function->name = "unsupported2DGrad";
1860 }
1861 }
1862 else
1863 {
1864 sample_function->name = projected ? "texture2DProj" : "texture2D";
1865 }
1866 }
1867 sample_function->coord_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1;
1868 break;
1869 case WINED3DSTT_CUBE:
1870 if(lod) {
1871 sample_function->name = "textureCubeLod";
1872 }
1873 else if (grad)
1874 {
1875 if (gl_info->supported[EXT_GPU_SHADER4])
1876 sample_function->name = "textureCubeGrad";
1877 else if (gl_info->supported[ARB_SHADER_TEXTURE_LOD])
1878 sample_function->name = "textureCubeGradARB";
1879 else
1880 {
1881 FIXME("Unsupported Cube grad function.\n");
1882 sample_function->name = "unsupportedCubeGrad";
1883 }
1884 }
1885 else
1886 {
1887 sample_function->name = "textureCube";
1888 }
1889 sample_function->coord_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
1890 break;
1891 case WINED3DSTT_VOLUME:
1892 if(lod) {
1893 sample_function->name = projected ? "texture3DProjLod" : "texture3DLod";
1894 }
1895 else if (grad)
1896 {
1897 if (gl_info->supported[EXT_GPU_SHADER4])
1898 sample_function->name = projected ? "texture3DProjGrad" : "texture3DGrad";
1899 else if (gl_info->supported[ARB_SHADER_TEXTURE_LOD])
1900 sample_function->name = projected ? "texture3DProjGradARB" : "texture3DGradARB";
1901 else
1902 {
1903 FIXME("Unsupported 3D grad function.\n");
1904 sample_function->name = "unsupported3DGrad";
1905 }
1906 }
1907 else
1908 {
1909 sample_function->name = projected ? "texture3DProj" : "texture3D";
1910 }
1911 sample_function->coord_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
1912 break;
1913 default:
1914 sample_function->name = "";
1915 sample_function->coord_mask = 0;
1916 FIXME("Unrecognized sampler type: %#x;\n", sampler_type);
1917 break;
1918 }
1919}
1920
1921static void shader_glsl_append_fixup_arg(char *arguments, const char *reg_name,
1922 BOOL sign_fixup, enum fixup_channel_source channel_source)
1923{
1924 switch(channel_source)
1925 {
1926 case CHANNEL_SOURCE_ZERO:
1927 strcat(arguments, "0.0");
1928 break;
1929
1930 case CHANNEL_SOURCE_ONE:
1931 strcat(arguments, "1.0");
1932 break;
1933
1934 case CHANNEL_SOURCE_X:
1935 strcat(arguments, reg_name);
1936 strcat(arguments, ".x");
1937 break;
1938
1939 case CHANNEL_SOURCE_Y:
1940 strcat(arguments, reg_name);
1941 strcat(arguments, ".y");
1942 break;
1943
1944 case CHANNEL_SOURCE_Z:
1945 strcat(arguments, reg_name);
1946 strcat(arguments, ".z");
1947 break;
1948
1949 case CHANNEL_SOURCE_W:
1950 strcat(arguments, reg_name);
1951 strcat(arguments, ".w");
1952 break;
1953
1954 default:
1955 FIXME("Unhandled channel source %#x\n", channel_source);
1956 strcat(arguments, "undefined");
1957 break;
1958 }
1959
1960 if (sign_fixup) strcat(arguments, " * 2.0 - 1.0");
1961}
1962
1963static void shader_glsl_color_correction(const struct wined3d_shader_instruction *ins, struct color_fixup_desc fixup)
1964{
1965 struct wined3d_shader_dst_param dst;
1966 unsigned int mask_size, remaining;
1967 glsl_dst_param_t dst_param;
1968 char arguments[256];
1969 DWORD mask;
1970
1971 mask = 0;
1972 if (fixup.x_sign_fixup || fixup.x_source != CHANNEL_SOURCE_X) mask |= WINED3DSP_WRITEMASK_0;
1973 if (fixup.y_sign_fixup || fixup.y_source != CHANNEL_SOURCE_Y) mask |= WINED3DSP_WRITEMASK_1;
1974 if (fixup.z_sign_fixup || fixup.z_source != CHANNEL_SOURCE_Z) mask |= WINED3DSP_WRITEMASK_2;
1975 if (fixup.w_sign_fixup || fixup.w_source != CHANNEL_SOURCE_W) mask |= WINED3DSP_WRITEMASK_3;
1976 mask &= ins->dst[0].write_mask;
1977
1978 if (!mask) return; /* Nothing to do */
1979
1980 if (is_complex_fixup(fixup))
1981 {
1982 enum complex_fixup complex_fixup = get_complex_fixup(fixup);
1983 FIXME("Complex fixup (%#x) not supported\n",complex_fixup);
1984 return;
1985 }
1986
1987 mask_size = shader_glsl_get_write_mask_size(mask);
1988
1989 dst = ins->dst[0];
1990 dst.write_mask = mask;
1991 shader_glsl_add_dst_param(ins, &dst, &dst_param);
1992
1993 arguments[0] = '\0';
1994 remaining = mask_size;
1995 if (mask & WINED3DSP_WRITEMASK_0)
1996 {
1997 shader_glsl_append_fixup_arg(arguments, dst_param.reg_name, fixup.x_sign_fixup, fixup.x_source);
1998 if (--remaining) strcat(arguments, ", ");
1999 }
2000 if (mask & WINED3DSP_WRITEMASK_1)
2001 {
2002 shader_glsl_append_fixup_arg(arguments, dst_param.reg_name, fixup.y_sign_fixup, fixup.y_source);
2003 if (--remaining) strcat(arguments, ", ");
2004 }
2005 if (mask & WINED3DSP_WRITEMASK_2)
2006 {
2007 shader_glsl_append_fixup_arg(arguments, dst_param.reg_name, fixup.z_sign_fixup, fixup.z_source);
2008 if (--remaining) strcat(arguments, ", ");
2009 }
2010 if (mask & WINED3DSP_WRITEMASK_3)
2011 {
2012 shader_glsl_append_fixup_arg(arguments, dst_param.reg_name, fixup.w_sign_fixup, fixup.w_source);
2013 if (--remaining) strcat(arguments, ", ");
2014 }
2015
2016 if (mask_size > 1)
2017 {
2018 shader_addline(ins->ctx->buffer, "%s%s = vec%u(%s);\n",
2019 dst_param.reg_name, dst_param.mask_str, mask_size, arguments);
2020 }
2021 else
2022 {
2023 shader_addline(ins->ctx->buffer, "%s%s = %s;\n", dst_param.reg_name, dst_param.mask_str, arguments);
2024 }
2025}
2026
2027static void PRINTF_ATTR(8, 9) shader_glsl_gen_sample_code(const struct wined3d_shader_instruction *ins,
2028 DWORD sampler, const glsl_sample_function_t *sample_function, DWORD swizzle,
2029 const char *dx, const char *dy,
2030 const char *bias, const char *coord_reg_fmt, ...)
2031{
2032 const char *sampler_base;
2033 char dst_swizzle[6];
2034 struct color_fixup_desc fixup;
2035 BOOL np2_fixup = FALSE;
2036 BOOL tmirror_tmp_reg = FALSE;
2037 va_list args;
2038
2039 shader_glsl_swizzle_to_str(swizzle, FALSE, ins->dst[0].write_mask, dst_swizzle);
2040
2041 if (shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type))
2042 {
2043 const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data;
2044 fixup = priv->cur_ps_args->color_fixup[sampler];
2045 sampler_base = "Psampler";
2046
2047 if (priv->cur_ps_args->np2_fixup & (1 << sampler)) {
2048 if(bias) {
2049 FIXME("Biased sampling from NP2 textures is unsupported\n");
2050 } else {
2051 np2_fixup = TRUE;
2052 }
2053 }
2054
2055 if (priv->cur_ps_args->t_mirror & (1 << sampler))
2056 {
2057 if (ins->ctx->reg_maps->sampler_type[sampler]==WINED3DSTT_2D)
2058 {
2059 if (sample_function->coord_mask & WINED3DSP_WRITEMASK_1)
2060 {
2061 glsl_src_param_t coord_param;
2062 shader_glsl_add_src_param(ins, &ins->src[0], sample_function->coord_mask, &coord_param);
2063
2064 if (ins->src[0].reg.type != WINED3DSPR_INPUT)
2065 {
2066 shader_addline(ins->ctx->buffer, "%s.y=1.0-%s.y;\n",
2067 coord_param.reg_name, coord_param.reg_name);
2068 }
2069 else
2070 {
2071 tmirror_tmp_reg = TRUE;
2072 shader_addline(ins->ctx->buffer, "tmp0.xy=vec2(%s.x, 1.0-%s.y).xy;\n",
2073 coord_param.reg_name, coord_param.reg_name);
2074 }
2075 }
2076 else
2077 {
2078 DebugBreak();
2079 FIXME("Unexpected coord_mask with t_mirror\n");
2080 }
2081 }
2082 }
2083 } else {
2084 sampler_base = "Vsampler";
2085 fixup = COLOR_FIXUP_IDENTITY; /* FIXME: Vshader color fixup */
2086 }
2087
2088 shader_glsl_append_dst(ins->ctx->buffer, ins);
2089
2090 shader_addline(ins->ctx->buffer, "%s(%s%u, ", sample_function->name, sampler_base, sampler);
2091
2092 if (tmirror_tmp_reg)
2093 {
2094 shader_addline(ins->ctx->buffer, "%s", "tmp0.xy");
2095 }
2096 else
2097 {
2098 va_start(args, coord_reg_fmt);
2099 shader_vaddline(ins->ctx->buffer, coord_reg_fmt, args);
2100 va_end(args);
2101 }
2102
2103 if(bias) {
2104 shader_addline(ins->ctx->buffer, ", %s)%s);\n", bias, dst_swizzle);
2105 } else {
2106 if (np2_fixup) {
2107 const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data;
2108 const unsigned char idx = priv->cur_np2fixup_info->idx[sampler];
2109
2110 shader_addline(ins->ctx->buffer, " * PsamplerNP2Fixup[%u].%s)%s);\n", idx >> 1,
2111 (idx % 2) ? "zw" : "xy", dst_swizzle);
2112 } else if(dx && dy) {
2113 shader_addline(ins->ctx->buffer, ", %s, %s)%s);\n", dx, dy, dst_swizzle);
2114 } else {
2115 shader_addline(ins->ctx->buffer, ")%s);\n", dst_swizzle);
2116 }
2117 }
2118
2119 if(!is_identity_fixup(fixup)) {
2120 shader_glsl_color_correction(ins, fixup);
2121 }
2122}
2123
2124/*****************************************************************************
2125 * Begin processing individual instruction opcodes
2126 ****************************************************************************/
2127
2128/* Generate GLSL arithmetic functions (dst = src1 + src2) */
2129static void shader_glsl_arith(const struct wined3d_shader_instruction *ins)
2130{
2131 struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
2132 glsl_src_param_t src0_param;
2133 glsl_src_param_t src1_param;
2134 DWORD write_mask;
2135 char op;
2136
2137 /* Determine the GLSL operator to use based on the opcode */
2138 switch (ins->handler_idx)
2139 {
2140 case WINED3DSIH_MUL: op = '*'; break;
2141 case WINED3DSIH_ADD: op = '+'; break;
2142 case WINED3DSIH_SUB: op = '-'; break;
2143 default:
2144 op = ' ';
2145 FIXME("Opcode %#x not yet handled in GLSL\n", ins->handler_idx);
2146 break;
2147 }
2148
2149 write_mask = shader_glsl_append_dst(buffer, ins);
2150 shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src0_param);
2151 shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param);
2152 shader_addline(buffer, "%s %c %s);\n", src0_param.param_str, op, src1_param.param_str);
2153}
2154
2155/* Process the WINED3DSIO_MOV opcode using GLSL (dst = src) */
2156static void shader_glsl_mov(const struct wined3d_shader_instruction *ins)
2157{
2158 const struct wined3d_gl_info *gl_info = ins->ctx->gl_info;
2159 struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
2160 glsl_src_param_t src0_param;
2161 DWORD write_mask;
2162
2163 write_mask = shader_glsl_append_dst(buffer, ins);
2164 shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src0_param);
2165
2166 /* In vs_1_1 WINED3DSIO_MOV can write to the address register. In later
2167 * shader versions WINED3DSIO_MOVA is used for this. */
2168 if (ins->ctx->reg_maps->shader_version.major == 1
2169 && !shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type)
2170 && ins->dst[0].reg.type == WINED3DSPR_ADDR)
2171 {
2172 /* This is a simple floor() */
2173 unsigned int mask_size = shader_glsl_get_write_mask_size(write_mask);
2174 if (mask_size > 1) {
2175 shader_addline(buffer, "ivec%d(floor(%s)));\n", mask_size, src0_param.param_str);
2176 } else {
2177 shader_addline(buffer, "int(floor(%s)));\n", src0_param.param_str);
2178 }
2179 }
2180 else if(ins->handler_idx == WINED3DSIH_MOVA)
2181 {
2182 /* We need to *round* to the nearest int here. */
2183 unsigned int mask_size = shader_glsl_get_write_mask_size(write_mask);
2184
2185 if (gl_info->supported[EXT_GPU_SHADER4])
2186 {
2187 if (mask_size > 1)
2188 shader_addline(buffer, "ivec%d(round(%s)));\n", mask_size, src0_param.param_str);
2189 else
2190 shader_addline(buffer, "int(round(%s)));\n", src0_param.param_str);
2191 }
2192 else
2193 {
2194 if (mask_size > 1)
2195 shader_addline(buffer, "ivec%d(floor(abs(%s) + vec%d(0.5)) * sign(%s)));\n",
2196 mask_size, src0_param.param_str, mask_size, src0_param.param_str);
2197 else
2198 shader_addline(buffer, "int(floor(abs(%s) + 0.5) * sign(%s)));\n",
2199 src0_param.param_str, src0_param.param_str);
2200 }
2201 }
2202 else
2203 {
2204 shader_addline(buffer, "%s);\n", src0_param.param_str);
2205 }
2206}
2207
2208/* Process the dot product operators DP3 and DP4 in GLSL (dst = dot(src0, src1)) */
2209static void shader_glsl_dot(const struct wined3d_shader_instruction *ins)
2210{
2211 struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
2212 glsl_src_param_t src0_param;
2213 glsl_src_param_t src1_param;
2214 DWORD dst_write_mask, src_write_mask;
2215 unsigned int dst_size = 0;
2216
2217 dst_write_mask = shader_glsl_append_dst(buffer, ins);
2218 dst_size = shader_glsl_get_write_mask_size(dst_write_mask);
2219
2220 /* dp3 works on vec3, dp4 on vec4 */
2221 if (ins->handler_idx == WINED3DSIH_DP4)
2222 {
2223 src_write_mask = WINED3DSP_WRITEMASK_ALL;
2224 } else {
2225 src_write_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
2226 }
2227
2228 shader_glsl_add_src_param(ins, &ins->src[0], src_write_mask, &src0_param);
2229 shader_glsl_add_src_param(ins, &ins->src[1], src_write_mask, &src1_param);
2230
2231 if (dst_size > 1) {
2232 shader_addline(buffer, "vec%d(dot(%s, %s)));\n", dst_size, src0_param.param_str, src1_param.param_str);
2233 } else {
2234 shader_addline(buffer, "dot(%s, %s));\n", src0_param.param_str, src1_param.param_str);
2235 }
2236}
2237
2238/* Note that this instruction has some restrictions. The destination write mask
2239 * can't contain the w component, and the source swizzles have to be .xyzw */
2240static void shader_glsl_cross(const struct wined3d_shader_instruction *ins)
2241{
2242 DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
2243 glsl_src_param_t src0_param;
2244 glsl_src_param_t src1_param;
2245 char dst_mask[6];
2246
2247 shader_glsl_get_write_mask(&ins->dst[0], dst_mask);
2248 shader_glsl_append_dst(ins->ctx->buffer, ins);
2249 shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
2250 shader_glsl_add_src_param(ins, &ins->src[1], src_mask, &src1_param);
2251 shader_addline(ins->ctx->buffer, "cross(%s, %s)%s);\n", src0_param.param_str, src1_param.param_str, dst_mask);
2252}
2253
2254/* Process the WINED3DSIO_POW instruction in GLSL (dst = |src0|^src1)
2255 * Src0 and src1 are scalars. Note that D3D uses the absolute of src0, while
2256 * GLSL uses the value as-is. */
2257static void shader_glsl_pow(const struct wined3d_shader_instruction *ins)
2258{
2259 struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
2260 glsl_src_param_t src0_param;
2261 glsl_src_param_t src1_param;
2262 DWORD dst_write_mask;
2263 unsigned int dst_size;
2264
2265 dst_write_mask = shader_glsl_append_dst(buffer, ins);
2266 dst_size = shader_glsl_get_write_mask_size(dst_write_mask);
2267
2268 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src0_param);
2269 shader_glsl_add_src_param(ins, &ins->src[1], WINED3DSP_WRITEMASK_0, &src1_param);
2270
2271 if (dst_size > 1) {
2272 shader_addline(buffer, "vec%d(pow(abs(%s), %s)));\n", dst_size, src0_param.param_str, src1_param.param_str);
2273 } else {
2274 shader_addline(buffer, "pow(abs(%s), %s));\n", src0_param.param_str, src1_param.param_str);
2275 }
2276}
2277
2278/* Process the WINED3DSIO_LOG instruction in GLSL (dst = log2(|src0|))
2279 * Src0 is a scalar. Note that D3D uses the absolute of src0, while
2280 * GLSL uses the value as-is. */
2281static void shader_glsl_log(const struct wined3d_shader_instruction *ins)
2282{
2283 struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
2284 glsl_src_param_t src0_param;
2285 DWORD dst_write_mask;
2286 unsigned int dst_size;
2287
2288 dst_write_mask = shader_glsl_append_dst(buffer, ins);
2289 dst_size = shader_glsl_get_write_mask_size(dst_write_mask);
2290
2291 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src0_param);
2292
2293 if (dst_size > 1)
2294 {
2295 shader_addline(buffer, "vec%d(%s == 0.0 ? -FLT_MAX : log2(abs(%s))));\n",
2296 dst_size, src0_param.param_str, src0_param.param_str);
2297 }
2298 else
2299 {
2300 shader_addline(buffer, "%s == 0.0 ? -FLT_MAX : log2(abs(%s)));\n",
2301 src0_param.param_str, src0_param.param_str);
2302 }
2303}
2304
2305/* Map the opcode 1-to-1 to the GL code (arg->dst = instruction(src0, src1, ...) */
2306static void shader_glsl_map2gl(const struct wined3d_shader_instruction *ins)
2307{
2308 struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
2309 glsl_src_param_t src_param;
2310 const char *instruction;
2311 DWORD write_mask;
2312 unsigned i;
2313
2314 /* Determine the GLSL function to use based on the opcode */
2315 /* TODO: Possibly make this a table for faster lookups */
2316 switch (ins->handler_idx)
2317 {
2318 case WINED3DSIH_MIN: instruction = "min"; break;
2319 case WINED3DSIH_MAX: instruction = "max"; break;
2320 case WINED3DSIH_ABS: instruction = "abs"; break;
2321 case WINED3DSIH_FRC: instruction = "fract"; break;
2322 case WINED3DSIH_EXP: instruction = "exp2"; break;
2323 case WINED3DSIH_DSX: instruction = "dFdx"; break;
2324 case WINED3DSIH_DSY: instruction = "ycorrection.y * dFdy"; break;
2325 default: instruction = "";
2326 FIXME("Opcode %#x not yet handled in GLSL\n", ins->handler_idx);
2327 break;
2328 }
2329
2330 write_mask = shader_glsl_append_dst(buffer, ins);
2331
2332 shader_addline(buffer, "%s(", instruction);
2333
2334 if (ins->src_count)
2335 {
2336 shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src_param);
2337 shader_addline(buffer, "%s", src_param.param_str);
2338 for (i = 1; i < ins->src_count; ++i)
2339 {
2340 shader_glsl_add_src_param(ins, &ins->src[i], write_mask, &src_param);
2341 shader_addline(buffer, ", %s", src_param.param_str);
2342 }
2343 }
2344
2345 shader_addline(buffer, "));\n");
2346}
2347
2348static void shader_glsl_nrm(const struct wined3d_shader_instruction *ins)
2349{
2350 struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
2351 glsl_src_param_t src_param;
2352 unsigned int mask_size;
2353 DWORD write_mask;
2354 char dst_mask[6];
2355
2356 write_mask = shader_glsl_get_write_mask(ins->dst, dst_mask);
2357 mask_size = shader_glsl_get_write_mask_size(write_mask);
2358 shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src_param);
2359
2360 shader_addline(buffer, "tmp0.x = length(%s);\n", src_param.param_str);
2361 shader_glsl_append_dst(buffer, ins);
2362 if (mask_size > 1)
2363 {
2364 shader_addline(buffer, "tmp0.x == 0.0 ? vec%u(0.0) : (%s / tmp0.x));\n",
2365 mask_size, src_param.param_str);
2366 }
2367 else
2368 {
2369 shader_addline(buffer, "tmp0.x == 0.0 ? 0.0 : (%s / tmp0.x));\n",
2370 src_param.param_str);
2371 }
2372}
2373
2374/** Process the WINED3DSIO_EXPP instruction in GLSL:
2375 * For shader model 1.x, do the following (and honor the writemask, so use a temporary variable):
2376 * dst.x = 2^(floor(src))
2377 * dst.y = src - floor(src)
2378 * dst.z = 2^src (partial precision is allowed, but optional)
2379 * dst.w = 1.0;
2380 * For 2.0 shaders, just do this (honoring writemask and swizzle):
2381 * dst = 2^src; (partial precision is allowed, but optional)
2382 */
2383static void shader_glsl_expp(const struct wined3d_shader_instruction *ins)
2384{
2385 glsl_src_param_t src_param;
2386
2387 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src_param);
2388
2389 if (ins->ctx->reg_maps->shader_version.major < 2)
2390 {
2391 char dst_mask[6];
2392
2393 shader_addline(ins->ctx->buffer, "tmp0.x = exp2(floor(%s));\n", src_param.param_str);
2394 shader_addline(ins->ctx->buffer, "tmp0.y = %s - floor(%s);\n", src_param.param_str, src_param.param_str);
2395 shader_addline(ins->ctx->buffer, "tmp0.z = exp2(%s);\n", src_param.param_str);
2396 shader_addline(ins->ctx->buffer, "tmp0.w = 1.0;\n");
2397
2398 shader_glsl_append_dst(ins->ctx->buffer, ins);
2399 shader_glsl_get_write_mask(&ins->dst[0], dst_mask);
2400 shader_addline(ins->ctx->buffer, "tmp0%s);\n", dst_mask);
2401 } else {
2402 DWORD write_mask;
2403 unsigned int mask_size;
2404
2405 write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
2406 mask_size = shader_glsl_get_write_mask_size(write_mask);
2407
2408 if (mask_size > 1) {
2409 shader_addline(ins->ctx->buffer, "vec%d(exp2(%s)));\n", mask_size, src_param.param_str);
2410 } else {
2411 shader_addline(ins->ctx->buffer, "exp2(%s));\n", src_param.param_str);
2412 }
2413 }
2414}
2415
2416/** Process the RCP (reciprocal or inverse) opcode in GLSL (dst = 1 / src) */
2417static void shader_glsl_rcp(const struct wined3d_shader_instruction *ins)
2418{
2419 glsl_src_param_t src_param;
2420 DWORD write_mask;
2421 unsigned int mask_size;
2422
2423 write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
2424 mask_size = shader_glsl_get_write_mask_size(write_mask);
2425 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &src_param);
2426
2427 if (mask_size > 1)
2428 {
2429 shader_addline(ins->ctx->buffer, "vec%d(%s == 0.0 ? FLT_MAX : 1.0 / %s));\n",
2430 mask_size, src_param.param_str, src_param.param_str);
2431 }
2432 else
2433 {
2434 shader_addline(ins->ctx->buffer, "%s == 0.0 ? FLT_MAX : 1.0 / %s);\n",
2435 src_param.param_str, src_param.param_str);
2436 }
2437}
2438
2439static void shader_glsl_rsq(const struct wined3d_shader_instruction *ins)
2440{
2441 struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
2442 glsl_src_param_t src_param;
2443 DWORD write_mask;
2444 unsigned int mask_size;
2445
2446 write_mask = shader_glsl_append_dst(buffer, ins);
2447 mask_size = shader_glsl_get_write_mask_size(write_mask);
2448
2449 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &src_param);
2450
2451 if (mask_size > 1)
2452 {
2453 shader_addline(buffer, "vec%d(%s == 0.0 ? FLT_MAX : inversesqrt(abs(%s))));\n",
2454 mask_size, src_param.param_str, src_param.param_str);
2455 }
2456 else
2457 {
2458 shader_addline(buffer, "%s == 0.0 ? FLT_MAX : inversesqrt(abs(%s)));\n",
2459 src_param.param_str, src_param.param_str);
2460 }
2461}
2462
2463/** Process signed comparison opcodes in GLSL. */
2464static void shader_glsl_compare(const struct wined3d_shader_instruction *ins)
2465{
2466 glsl_src_param_t src0_param;
2467 glsl_src_param_t src1_param;
2468 DWORD write_mask;
2469 unsigned int mask_size;
2470
2471 write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
2472 mask_size = shader_glsl_get_write_mask_size(write_mask);
2473 shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src0_param);
2474 shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param);
2475
2476 if (mask_size > 1) {
2477 const char *compare;
2478
2479 switch(ins->handler_idx)
2480 {
2481 case WINED3DSIH_SLT: compare = "lessThan"; break;
2482 case WINED3DSIH_SGE: compare = "greaterThanEqual"; break;
2483 default: compare = "";
2484 FIXME("Can't handle opcode %#x\n", ins->handler_idx);
2485 }
2486
2487 shader_addline(ins->ctx->buffer, "vec%d(%s(%s, %s)));\n", mask_size, compare,
2488 src0_param.param_str, src1_param.param_str);
2489 } else {
2490 switch(ins->handler_idx)
2491 {
2492 case WINED3DSIH_SLT:
2493 /* Step(src0, src1) is not suitable here because if src0 == src1 SLT is supposed,
2494 * to return 0.0 but step returns 1.0 because step is not < x
2495 * An alternative is a bvec compare padded with an unused second component.
2496 * step(src1 * -1.0, src0 * -1.0) is not an option because it suffers from the same
2497 * issue. Playing with not() is not possible either because not() does not accept
2498 * a scalar.
2499 */
2500 shader_addline(ins->ctx->buffer, "(%s < %s) ? 1.0 : 0.0);\n",
2501 src0_param.param_str, src1_param.param_str);
2502 break;
2503 case WINED3DSIH_SGE:
2504 /* Here we can use the step() function and safe a conditional */
2505 shader_addline(ins->ctx->buffer, "step(%s, %s));\n", src1_param.param_str, src0_param.param_str);
2506 break;
2507 default:
2508 FIXME("Can't handle opcode %#x\n", ins->handler_idx);
2509 }
2510
2511 }
2512}
2513
2514/** Process CMP instruction in GLSL (dst = src0 >= 0.0 ? src1 : src2), per channel */
2515static void shader_glsl_cmp(const struct wined3d_shader_instruction *ins)
2516{
2517 glsl_src_param_t src0_param;
2518 glsl_src_param_t src1_param;
2519 glsl_src_param_t src2_param;
2520 DWORD write_mask, cmp_channel = 0;
2521 unsigned int i, j;
2522 char mask_char[6];
2523 BOOL temp_destination = FALSE;
2524
2525 if (shader_is_scalar(&ins->src[0].reg))
2526 {
2527 write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
2528
2529 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_ALL, &src0_param);
2530 shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param);
2531 shader_glsl_add_src_param(ins, &ins->src[2], write_mask, &src2_param);
2532
2533 shader_addline(ins->ctx->buffer, "%s >= 0.0 ? %s : %s);\n",
2534 src0_param.param_str, src1_param.param_str, src2_param.param_str);
2535 } else {
2536 DWORD dst_mask = ins->dst[0].write_mask;
2537 struct wined3d_shader_dst_param dst = ins->dst[0];
2538
2539 /* Cycle through all source0 channels */
2540 for (i=0; i<4; i++) {
2541 write_mask = 0;
2542 /* Find the destination channels which use the current source0 channel */
2543 for (j=0; j<4; j++) {
2544 if (((ins->src[0].swizzle >> (2 * j)) & 0x3) == i)
2545 {
2546 write_mask |= WINED3DSP_WRITEMASK_0 << j;
2547 cmp_channel = WINED3DSP_WRITEMASK_0 << j;
2548 }
2549 }
2550 dst.write_mask = dst_mask & write_mask;
2551
2552 /* Splitting the cmp instruction up in multiple lines imposes a problem:
2553 * The first lines may overwrite source parameters of the following lines.
2554 * Deal with that by using a temporary destination register if needed
2555 */
2556 if ((ins->src[0].reg.idx == ins->dst[0].reg.idx
2557 && ins->src[0].reg.type == ins->dst[0].reg.type)
2558 || (ins->src[1].reg.idx == ins->dst[0].reg.idx
2559 && ins->src[1].reg.type == ins->dst[0].reg.type)
2560 || (ins->src[2].reg.idx == ins->dst[0].reg.idx
2561 && ins->src[2].reg.type == ins->dst[0].reg.type))
2562 {
2563 write_mask = shader_glsl_get_write_mask(&dst, mask_char);
2564 if (!write_mask) continue;
2565 shader_addline(ins->ctx->buffer, "tmp0%s = (", mask_char);
2566 temp_destination = TRUE;
2567 } else {
2568 write_mask = shader_glsl_append_dst_ext(ins->ctx->buffer, ins, &dst);
2569 if (!write_mask) continue;
2570 }
2571
2572 shader_glsl_add_src_param(ins, &ins->src[0], cmp_channel, &src0_param);
2573 shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param);
2574 shader_glsl_add_src_param(ins, &ins->src[2], write_mask, &src2_param);
2575
2576 shader_addline(ins->ctx->buffer, "%s >= 0.0 ? %s : %s);\n",
2577 src0_param.param_str, src1_param.param_str, src2_param.param_str);
2578 }
2579
2580 if(temp_destination) {
2581 shader_glsl_get_write_mask(&ins->dst[0], mask_char);
2582 shader_glsl_append_dst(ins->ctx->buffer, ins);
2583 shader_addline(ins->ctx->buffer, "tmp0%s);\n", mask_char);
2584 }
2585 }
2586
2587}
2588
2589/** Process the CND opcode in GLSL (dst = (src0 > 0.5) ? src1 : src2) */
2590/* For ps 1.1-1.3, only a single component of src0 is used. For ps 1.4
2591 * the compare is done per component of src0. */
2592static void shader_glsl_cnd(const struct wined3d_shader_instruction *ins)
2593{
2594 struct wined3d_shader_dst_param dst;
2595 glsl_src_param_t src0_param;
2596 glsl_src_param_t src1_param;
2597 glsl_src_param_t src2_param;
2598 DWORD write_mask, cmp_channel = 0;
2599 unsigned int i, j;
2600 DWORD dst_mask;
2601 DWORD shader_version = WINED3D_SHADER_VERSION(ins->ctx->reg_maps->shader_version.major,
2602 ins->ctx->reg_maps->shader_version.minor);
2603
2604 if (shader_version < WINED3D_SHADER_VERSION(1, 4))
2605 {
2606 write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
2607 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src0_param);
2608 shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param);
2609 shader_glsl_add_src_param(ins, &ins->src[2], write_mask, &src2_param);
2610
2611 /* Fun: The D3DSI_COISSUE flag changes the semantic of the cnd instruction for < 1.4 shaders */
2612 if (ins->coissue)
2613 {
2614 shader_addline(ins->ctx->buffer, "%s /* COISSUE! */);\n", src1_param.param_str);
2615 } else {
2616 shader_addline(ins->ctx->buffer, "%s > 0.5 ? %s : %s);\n",
2617 src0_param.param_str, src1_param.param_str, src2_param.param_str);
2618 }
2619 return;
2620 }
2621 /* Cycle through all source0 channels */
2622 dst_mask = ins->dst[0].write_mask;
2623 dst = ins->dst[0];
2624 for (i=0; i<4; i++) {
2625 write_mask = 0;
2626 /* Find the destination channels which use the current source0 channel */
2627 for (j=0; j<4; j++) {
2628 if (((ins->src[0].swizzle >> (2 * j)) & 0x3) == i)
2629 {
2630 write_mask |= WINED3DSP_WRITEMASK_0 << j;
2631 cmp_channel = WINED3DSP_WRITEMASK_0 << j;
2632 }
2633 }
2634
2635 dst.write_mask = dst_mask & write_mask;
2636 write_mask = shader_glsl_append_dst_ext(ins->ctx->buffer, ins, &dst);
2637 if (!write_mask) continue;
2638
2639 shader_glsl_add_src_param(ins, &ins->src[0], cmp_channel, &src0_param);
2640 shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param);
2641 shader_glsl_add_src_param(ins, &ins->src[2], write_mask, &src2_param);
2642
2643 shader_addline(ins->ctx->buffer, "%s > 0.5 ? %s : %s);\n",
2644 src0_param.param_str, src1_param.param_str, src2_param.param_str);
2645 }
2646}
2647
2648/** GLSL code generation for WINED3DSIO_MAD: Multiply the first 2 opcodes, then add the last */
2649static void shader_glsl_mad(const struct wined3d_shader_instruction *ins)
2650{
2651 glsl_src_param_t src0_param;
2652 glsl_src_param_t src1_param;
2653 glsl_src_param_t src2_param;
2654 DWORD write_mask;
2655
2656 write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
2657 shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src0_param);
2658 shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param);
2659 shader_glsl_add_src_param(ins, &ins->src[2], write_mask, &src2_param);
2660 shader_addline(ins->ctx->buffer, "(%s * %s) + %s);\n",
2661 src0_param.param_str, src1_param.param_str, src2_param.param_str);
2662}
2663
2664/* Handles transforming all WINED3DSIO_M?x? opcodes for
2665 Vertex shaders to GLSL codes */
2666static void shader_glsl_mnxn(const struct wined3d_shader_instruction *ins)
2667{
2668 int i;
2669 int nComponents = 0;
2670 struct wined3d_shader_dst_param tmp_dst = {{0}};
2671 struct wined3d_shader_src_param tmp_src[2] = {{{0}}};
2672 struct wined3d_shader_instruction tmp_ins;
2673
2674 memset(&tmp_ins, 0, sizeof(tmp_ins));
2675
2676 /* Set constants for the temporary argument */
2677 tmp_ins.ctx = ins->ctx;
2678 tmp_ins.dst_count = 1;
2679 tmp_ins.dst = &tmp_dst;
2680 tmp_ins.src_count = 2;
2681 tmp_ins.src = tmp_src;
2682
2683 switch(ins->handler_idx)
2684 {
2685 case WINED3DSIH_M4x4:
2686 nComponents = 4;
2687 tmp_ins.handler_idx = WINED3DSIH_DP4;
2688 break;
2689 case WINED3DSIH_M4x3:
2690 nComponents = 3;
2691 tmp_ins.handler_idx = WINED3DSIH_DP4;
2692 break;
2693 case WINED3DSIH_M3x4:
2694 nComponents = 4;
2695 tmp_ins.handler_idx = WINED3DSIH_DP3;
2696 break;
2697 case WINED3DSIH_M3x3:
2698 nComponents = 3;
2699 tmp_ins.handler_idx = WINED3DSIH_DP3;
2700 break;
2701 case WINED3DSIH_M3x2:
2702 nComponents = 2;
2703 tmp_ins.handler_idx = WINED3DSIH_DP3;
2704 break;
2705 default:
2706 break;
2707 }
2708
2709 tmp_dst = ins->dst[0];
2710 tmp_src[0] = ins->src[0];
2711 tmp_src[1] = ins->src[1];
2712 for (i = 0; i < nComponents; ++i)
2713 {
2714 tmp_dst.write_mask = WINED3DSP_WRITEMASK_0 << i;
2715 shader_glsl_dot(&tmp_ins);
2716 ++tmp_src[1].reg.idx;
2717 }
2718}
2719
2720/**
2721 The LRP instruction performs a component-wise linear interpolation
2722 between the second and third operands using the first operand as the
2723 blend factor. Equation: (dst = src2 + src0 * (src1 - src2))
2724 This is equivalent to mix(src2, src1, src0);
2725*/
2726static void shader_glsl_lrp(const struct wined3d_shader_instruction *ins)
2727{
2728 glsl_src_param_t src0_param;
2729 glsl_src_param_t src1_param;
2730 glsl_src_param_t src2_param;
2731 DWORD write_mask;
2732
2733 write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
2734
2735 shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src0_param);
2736 shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param);
2737 shader_glsl_add_src_param(ins, &ins->src[2], write_mask, &src2_param);
2738
2739 shader_addline(ins->ctx->buffer, "mix(%s, %s, %s));\n",
2740 src2_param.param_str, src1_param.param_str, src0_param.param_str);
2741}
2742
2743/** Process the WINED3DSIO_LIT instruction in GLSL:
2744 * dst.x = dst.w = 1.0
2745 * dst.y = (src0.x > 0) ? src0.x
2746 * dst.z = (src0.x > 0) ? ((src0.y > 0) ? pow(src0.y, src.w) : 0) : 0
2747 * where src.w is clamped at +- 128
2748 */
2749static void shader_glsl_lit(const struct wined3d_shader_instruction *ins)
2750{
2751 glsl_src_param_t src0_param;
2752 glsl_src_param_t src1_param;
2753 glsl_src_param_t src3_param;
2754 char dst_mask[6];
2755
2756 shader_glsl_append_dst(ins->ctx->buffer, ins);
2757 shader_glsl_get_write_mask(&ins->dst[0], dst_mask);
2758
2759 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src0_param);
2760 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_1, &src1_param);
2761 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &src3_param);
2762
2763 /* The sdk specifies the instruction like this
2764 * dst.x = 1.0;
2765 * if(src.x > 0.0) dst.y = src.x
2766 * else dst.y = 0.0.
2767 * if(src.x > 0.0 && src.y > 0.0) dst.z = pow(src.y, power);
2768 * else dst.z = 0.0;
2769 * dst.w = 1.0;
2770 *
2771 * Obviously that has quite a few conditionals in it which we don't like. So the first step is this:
2772 * dst.x = 1.0 ... No further explanation needed
2773 * dst.y = max(src.y, 0.0); ... If x < 0.0, use 0.0, otherwise x. Same as the conditional
2774 * dst.z = x > 0.0 ? pow(max(y, 0.0), p) : 0; ... 0 ^ power is 0, and otherwise we use y anyway
2775 * dst.w = 1.0. ... Nothing fancy.
2776 *
2777 * So we still have one conditional in there. So do this:
2778 * dst.z = pow(max(0.0, src.y) * step(0.0, src.x), power);
2779 *
2780 * step(0.0, x) will return 1 if src.x > 0.0, and 0 otherwise. So if y is 0 we get pow(0.0 * 1.0, power),
2781 * which sets dst.z to 0. If y > 0, but x = 0.0, we get pow(y * 0.0, power), which results in 0 too.
2782 * if both x and y are > 0, we get pow(y * 1.0, power), as it is supposed to
2783 */
2784 shader_addline(ins->ctx->buffer,
2785 "vec4(1.0, max(%s, 0.0), pow(max(0.0, %s) * step(0.0, %s), clamp(%s, -128.0, 128.0)), 1.0)%s);\n",
2786 src0_param.param_str, src1_param.param_str, src0_param.param_str, src3_param.param_str, dst_mask);
2787}
2788
2789/** Process the WINED3DSIO_DST instruction in GLSL:
2790 * dst.x = 1.0
2791 * dst.y = src0.x * src0.y
2792 * dst.z = src0.z
2793 * dst.w = src1.w
2794 */
2795static void shader_glsl_dst(const struct wined3d_shader_instruction *ins)
2796{
2797 glsl_src_param_t src0y_param;
2798 glsl_src_param_t src0z_param;
2799 glsl_src_param_t src1y_param;
2800 glsl_src_param_t src1w_param;
2801 char dst_mask[6];
2802
2803 shader_glsl_append_dst(ins->ctx->buffer, ins);
2804 shader_glsl_get_write_mask(&ins->dst[0], dst_mask);
2805
2806 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_1, &src0y_param);
2807 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_2, &src0z_param);
2808 shader_glsl_add_src_param(ins, &ins->src[1], WINED3DSP_WRITEMASK_1, &src1y_param);
2809 shader_glsl_add_src_param(ins, &ins->src[1], WINED3DSP_WRITEMASK_3, &src1w_param);
2810
2811 shader_addline(ins->ctx->buffer, "vec4(1.0, %s * %s, %s, %s))%s;\n",
2812 src0y_param.param_str, src1y_param.param_str, src0z_param.param_str, src1w_param.param_str, dst_mask);
2813}
2814
2815/** Process the WINED3DSIO_SINCOS instruction in GLSL:
2816 * VS 2.0 requires that specific cosine and sine constants be passed to this instruction so the hardware
2817 * can handle it. But, these functions are built-in for GLSL, so we can just ignore the last 2 params.
2818 *
2819 * dst.x = cos(src0.?)
2820 * dst.y = sin(src0.?)
2821 * dst.z = dst.z
2822 * dst.w = dst.w
2823 */
2824static void shader_glsl_sincos(const struct wined3d_shader_instruction *ins)
2825{
2826 glsl_src_param_t src0_param;
2827 DWORD write_mask;
2828
2829 write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
2830 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src0_param);
2831
2832 switch (write_mask) {
2833 case WINED3DSP_WRITEMASK_0:
2834 shader_addline(ins->ctx->buffer, "cos(%s));\n", src0_param.param_str);
2835 break;
2836
2837 case WINED3DSP_WRITEMASK_1:
2838 shader_addline(ins->ctx->buffer, "sin(%s));\n", src0_param.param_str);
2839 break;
2840
2841 case (WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1):
2842 shader_addline(ins->ctx->buffer, "vec2(cos(%s), sin(%s)));\n", src0_param.param_str, src0_param.param_str);
2843 break;
2844
2845 default:
2846 ERR("Write mask should be .x, .y or .xy\n");
2847 break;
2848 }
2849}
2850
2851/* sgn in vs_2_0 has 2 extra parameters(registers for temporary storage) which we don't use
2852 * here. But those extra parameters require a dedicated function for sgn, since map2gl would
2853 * generate invalid code
2854 */
2855static void shader_glsl_sgn(const struct wined3d_shader_instruction *ins)
2856{
2857 glsl_src_param_t src0_param;
2858 DWORD write_mask;
2859
2860 write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
2861 shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src0_param);
2862
2863 shader_addline(ins->ctx->buffer, "sign(%s));\n", src0_param.param_str);
2864}
2865
2866/** Process the WINED3DSIO_LOOP instruction in GLSL:
2867 * Start a for() loop where src1.y is the initial value of aL,
2868 * increment aL by src1.z for a total of src1.x iterations.
2869 * Need to use a temporary variable for this operation.
2870 */
2871/* FIXME: I don't think nested loops will work correctly this way. */
2872static void shader_glsl_loop(const struct wined3d_shader_instruction *ins)
2873{
2874 glsl_src_param_t src1_param;
2875 IWineD3DBaseShaderImpl *shader = (IWineD3DBaseShaderImpl *)ins->ctx->shader;
2876 const DWORD *control_values = NULL;
2877 const local_constant *constant;
2878
2879 shader_glsl_add_src_param(ins, &ins->src[1], WINED3DSP_WRITEMASK_ALL, &src1_param);
2880
2881 /* Try to hardcode the loop control parameters if possible. Direct3D 9 class hardware doesn't support real
2882 * varying indexing, but Microsoft designed this feature for Shader model 2.x+. If the loop control is
2883 * known at compile time, the GLSL compiler can unroll the loop, and replace indirect addressing with direct
2884 * addressing.
2885 */
2886 if (ins->src[1].reg.type == WINED3DSPR_CONSTINT)
2887 {
2888 LIST_FOR_EACH_ENTRY(constant, &shader->baseShader.constantsI, local_constant, entry) {
2889 if (constant->idx == ins->src[1].reg.idx)
2890 {
2891 control_values = constant->value;
2892 break;
2893 }
2894 }
2895 }
2896
2897 if (control_values)
2898 {
2899 struct wined3d_shader_loop_control loop_control;
2900 loop_control.count = control_values[0];
2901 loop_control.start = control_values[1];
2902 loop_control.step = (int)control_values[2];
2903
2904 if (loop_control.step > 0)
2905 {
2906 shader_addline(ins->ctx->buffer, "for (aL%u = %u; aL%u < (%u * %d + %u); aL%u += %d) {\n",
2907 shader->baseShader.cur_loop_depth, loop_control.start,
2908 shader->baseShader.cur_loop_depth, loop_control.count, loop_control.step, loop_control.start,
2909 shader->baseShader.cur_loop_depth, loop_control.step);
2910 }
2911 else if (loop_control.step < 0)
2912 {
2913 shader_addline(ins->ctx->buffer, "for (aL%u = %u; aL%u > (%u * %d + %u); aL%u += %d) {\n",
2914 shader->baseShader.cur_loop_depth, loop_control.start,
2915 shader->baseShader.cur_loop_depth, loop_control.count, loop_control.step, loop_control.start,
2916 shader->baseShader.cur_loop_depth, loop_control.step);
2917 }
2918 else
2919 {
2920 shader_addline(ins->ctx->buffer, "for (aL%u = %u, tmpInt%u = 0; tmpInt%u < %u; tmpInt%u++) {\n",
2921 shader->baseShader.cur_loop_depth, loop_control.start, shader->baseShader.cur_loop_depth,
2922 shader->baseShader.cur_loop_depth, loop_control.count,
2923 shader->baseShader.cur_loop_depth);
2924 }
2925 } else {
2926 shader_addline(ins->ctx->buffer,
2927 "for (tmpInt%u = 0, aL%u = %s.y; tmpInt%u < %s.x; tmpInt%u++, aL%u += %s.z) {\n",
2928 shader->baseShader.cur_loop_depth, shader->baseShader.cur_loop_regno,
2929 src1_param.reg_name, shader->baseShader.cur_loop_depth, src1_param.reg_name,
2930 shader->baseShader.cur_loop_depth, shader->baseShader.cur_loop_regno, src1_param.reg_name);
2931 }
2932
2933 shader->baseShader.cur_loop_depth++;
2934 shader->baseShader.cur_loop_regno++;
2935}
2936
2937static void shader_glsl_end(const struct wined3d_shader_instruction *ins)
2938{
2939 IWineD3DBaseShaderImpl *shader = (IWineD3DBaseShaderImpl *)ins->ctx->shader;
2940
2941 shader_addline(ins->ctx->buffer, "}\n");
2942
2943 if (ins->handler_idx == WINED3DSIH_ENDLOOP)
2944 {
2945 shader->baseShader.cur_loop_depth--;
2946 shader->baseShader.cur_loop_regno--;
2947 }
2948
2949 if (ins->handler_idx == WINED3DSIH_ENDREP)
2950 {
2951 shader->baseShader.cur_loop_depth--;
2952 }
2953}
2954
2955static void shader_glsl_rep(const struct wined3d_shader_instruction *ins)
2956{
2957 IWineD3DBaseShaderImpl *shader = (IWineD3DBaseShaderImpl *)ins->ctx->shader;
2958 glsl_src_param_t src0_param;
2959 const DWORD *control_values = NULL;
2960 const local_constant *constant;
2961
2962 /* Try to hardcode local values to help the GLSL compiler to unroll and optimize the loop */
2963 if (ins->src[0].reg.type == WINED3DSPR_CONSTINT)
2964 {
2965 LIST_FOR_EACH_ENTRY(constant, &shader->baseShader.constantsI, local_constant, entry)
2966 {
2967 if (constant->idx == ins->src[0].reg.idx)
2968 {
2969 control_values = constant->value;
2970 break;
2971 }
2972 }
2973 }
2974
2975 if(control_values) {
2976 shader_addline(ins->ctx->buffer, "for (tmpInt%d = 0; tmpInt%d < %d; tmpInt%d++) {\n",
2977 shader->baseShader.cur_loop_depth, shader->baseShader.cur_loop_depth,
2978 control_values[0], shader->baseShader.cur_loop_depth);
2979 } else {
2980 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src0_param);
2981 shader_addline(ins->ctx->buffer, "for (tmpInt%d = 0; tmpInt%d < %s; tmpInt%d++) {\n",
2982 shader->baseShader.cur_loop_depth, shader->baseShader.cur_loop_depth,
2983 src0_param.param_str, shader->baseShader.cur_loop_depth);
2984 }
2985 shader->baseShader.cur_loop_depth++;
2986}
2987
2988static void shader_glsl_if(const struct wined3d_shader_instruction *ins)
2989{
2990 glsl_src_param_t src0_param;
2991
2992 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src0_param);
2993 shader_addline(ins->ctx->buffer, "if (%s) {\n", src0_param.param_str);
2994}
2995
2996static void shader_glsl_ifc(const struct wined3d_shader_instruction *ins)
2997{
2998 glsl_src_param_t src0_param;
2999 glsl_src_param_t src1_param;
3000
3001 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src0_param);
3002 shader_glsl_add_src_param(ins, &ins->src[1], WINED3DSP_WRITEMASK_0, &src1_param);
3003
3004 shader_addline(ins->ctx->buffer, "if (%s %s %s) {\n",
3005 src0_param.param_str, shader_get_comp_op(ins->flags), src1_param.param_str);
3006}
3007
3008static void shader_glsl_else(const struct wined3d_shader_instruction *ins)
3009{
3010 shader_addline(ins->ctx->buffer, "} else {\n");
3011}
3012
3013static void shader_glsl_break(const struct wined3d_shader_instruction *ins)
3014{
3015 shader_addline(ins->ctx->buffer, "break;\n");
3016}
3017
3018/* FIXME: According to MSDN the compare is done per component. */
3019static void shader_glsl_breakc(const struct wined3d_shader_instruction *ins)
3020{
3021 glsl_src_param_t src0_param;
3022 glsl_src_param_t src1_param;
3023
3024 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src0_param);
3025 shader_glsl_add_src_param(ins, &ins->src[1], WINED3DSP_WRITEMASK_0, &src1_param);
3026
3027 shader_addline(ins->ctx->buffer, "if (%s %s %s) break;\n",
3028 src0_param.param_str, shader_get_comp_op(ins->flags), src1_param.param_str);
3029}
3030
3031static void shader_glsl_label(const struct wined3d_shader_instruction *ins)
3032{
3033 shader_addline(ins->ctx->buffer, "}\n");
3034 shader_addline(ins->ctx->buffer, "void subroutine%u () {\n", ins->src[0].reg.idx);
3035}
3036
3037static void shader_glsl_call(const struct wined3d_shader_instruction *ins)
3038{
3039 shader_addline(ins->ctx->buffer, "subroutine%u();\n", ins->src[0].reg.idx);
3040}
3041
3042static void shader_glsl_callnz(const struct wined3d_shader_instruction *ins)
3043{
3044 glsl_src_param_t src1_param;
3045
3046 shader_glsl_add_src_param(ins, &ins->src[1], WINED3DSP_WRITEMASK_0, &src1_param);
3047 shader_addline(ins->ctx->buffer, "if (%s) subroutine%u();\n", src1_param.param_str, ins->src[0].reg.idx);
3048}
3049
3050static void shader_glsl_ret(const struct wined3d_shader_instruction *ins)
3051{
3052 /* No-op. The closing } is written when a new function is started, and at the end of the shader. This
3053 * function only suppresses the unhandled instruction warning
3054 */
3055}
3056
3057/*********************************************
3058 * Pixel Shader Specific Code begins here
3059 ********************************************/
3060static void shader_glsl_tex(const struct wined3d_shader_instruction *ins)
3061{
3062 IWineD3DBaseShaderImpl *shader = (IWineD3DBaseShaderImpl *)ins->ctx->shader;
3063 IWineD3DDeviceImpl *deviceImpl = (IWineD3DDeviceImpl *)shader->baseShader.device;
3064 DWORD shader_version = WINED3D_SHADER_VERSION(ins->ctx->reg_maps->shader_version.major,
3065 ins->ctx->reg_maps->shader_version.minor);
3066 const struct wined3d_gl_info *gl_info = ins->ctx->gl_info;
3067 glsl_sample_function_t sample_function;
3068 DWORD sample_flags = 0;
3069 WINED3DSAMPLER_TEXTURE_TYPE sampler_type;
3070 DWORD sampler_idx;
3071 DWORD mask = 0, swizzle;
3072
3073 /* 1.0-1.4: Use destination register as sampler source.
3074 * 2.0+: Use provided sampler source. */
3075 if (shader_version < WINED3D_SHADER_VERSION(2,0)) sampler_idx = ins->dst[0].reg.idx;
3076 else sampler_idx = ins->src[1].reg.idx;
3077 sampler_type = ins->ctx->reg_maps->sampler_type[sampler_idx];
3078
3079 if (shader_version < WINED3D_SHADER_VERSION(1,4))
3080 {
3081 const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data;
3082 DWORD flags = (priv->cur_ps_args->tex_transform >> (sampler_idx * WINED3D_PSARGS_TEXTRANSFORM_SHIFT))
3083 & WINED3D_PSARGS_TEXTRANSFORM_MASK;
3084
3085 /* Projected cube textures don't make a lot of sense, the resulting coordinates stay the same. */
3086 if (flags & WINED3D_PSARGS_PROJECTED && sampler_type != WINED3DSTT_CUBE) {
3087 sample_flags |= WINED3D_GLSL_SAMPLE_PROJECTED;
3088 switch (flags & ~WINED3D_PSARGS_PROJECTED) {
3089 case WINED3DTTFF_COUNT1: FIXME("WINED3DTTFF_PROJECTED with WINED3DTTFF_COUNT1?\n"); break;
3090 case WINED3DTTFF_COUNT2: mask = WINED3DSP_WRITEMASK_1; break;
3091 case WINED3DTTFF_COUNT3: mask = WINED3DSP_WRITEMASK_2; break;
3092 case WINED3DTTFF_COUNT4:
3093 case WINED3DTTFF_DISABLE: mask = WINED3DSP_WRITEMASK_3; break;
3094 }
3095 }
3096 }
3097 else if (shader_version < WINED3D_SHADER_VERSION(2,0))
3098 {
3099 DWORD src_mod = ins->src[0].modifiers;
3100
3101 if (src_mod == WINED3DSPSM_DZ) {
3102 sample_flags |= WINED3D_GLSL_SAMPLE_PROJECTED;
3103 mask = WINED3DSP_WRITEMASK_2;
3104 } else if (src_mod == WINED3DSPSM_DW) {
3105 sample_flags |= WINED3D_GLSL_SAMPLE_PROJECTED;
3106 mask = WINED3DSP_WRITEMASK_3;
3107 }
3108 } else {
3109 if (ins->flags & WINED3DSI_TEXLD_PROJECT)
3110 {
3111 /* ps 2.0 texldp instruction always divides by the fourth component. */
3112 sample_flags |= WINED3D_GLSL_SAMPLE_PROJECTED;
3113 mask = WINED3DSP_WRITEMASK_3;
3114 }
3115 }
3116
3117 if(deviceImpl->stateBlock->textures[sampler_idx] &&
3118 IWineD3DBaseTexture_GetTextureDimensions(deviceImpl->stateBlock->textures[sampler_idx]) == GL_TEXTURE_RECTANGLE_ARB) {
3119 sample_flags |= WINED3D_GLSL_SAMPLE_RECT;
3120 }
3121
3122 shader_glsl_get_sample_function(gl_info, sampler_type, sample_flags, &sample_function);
3123 mask |= sample_function.coord_mask;
3124
3125 if (shader_version < WINED3D_SHADER_VERSION(2,0)) swizzle = WINED3DSP_NOSWIZZLE;
3126 else swizzle = ins->src[1].swizzle;
3127
3128 /* 1.0-1.3: Use destination register as coordinate source.
3129 1.4+: Use provided coordinate source register. */
3130 if (shader_version < WINED3D_SHADER_VERSION(1,4))
3131 {
3132 char coord_mask[6];
3133 shader_glsl_write_mask_to_str(mask, coord_mask);
3134 shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, swizzle, NULL, NULL, NULL,
3135 "T%u%s", sampler_idx, coord_mask);
3136 } else {
3137 glsl_src_param_t coord_param;
3138 shader_glsl_add_src_param(ins, &ins->src[0], mask, &coord_param);
3139 if (ins->flags & WINED3DSI_TEXLD_BIAS)
3140 {
3141 glsl_src_param_t bias;
3142 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &bias);
3143 shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, swizzle, NULL, NULL, bias.param_str,
3144 "%s", coord_param.param_str);
3145 } else {
3146 shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, swizzle, NULL, NULL, NULL,
3147 "%s", coord_param.param_str);
3148 }
3149 }
3150}
3151
3152static void shader_glsl_texldd(const struct wined3d_shader_instruction *ins)
3153{
3154 IWineD3DBaseShaderImpl *This = (IWineD3DBaseShaderImpl *)ins->ctx->shader;
3155 IWineD3DDeviceImpl* deviceImpl = (IWineD3DDeviceImpl*) This->baseShader.device;
3156 const struct wined3d_gl_info *gl_info = ins->ctx->gl_info;
3157 glsl_sample_function_t sample_function;
3158 glsl_src_param_t coord_param, dx_param, dy_param;
3159 DWORD sample_flags = WINED3D_GLSL_SAMPLE_GRAD;
3160 DWORD sampler_type;
3161 DWORD sampler_idx;
3162 DWORD swizzle = ins->src[1].swizzle;
3163
3164 if (!gl_info->supported[ARB_SHADER_TEXTURE_LOD] && !gl_info->supported[EXT_GPU_SHADER4])
3165 {
3166 FIXME("texldd used, but not supported by hardware. Falling back to regular tex\n");
3167 shader_glsl_tex(ins);
3168 return;
3169 }
3170
3171 sampler_idx = ins->src[1].reg.idx;
3172 sampler_type = ins->ctx->reg_maps->sampler_type[sampler_idx];
3173 if(deviceImpl->stateBlock->textures[sampler_idx] &&
3174 IWineD3DBaseTexture_GetTextureDimensions(deviceImpl->stateBlock->textures[sampler_idx]) == GL_TEXTURE_RECTANGLE_ARB) {
3175 sample_flags |= WINED3D_GLSL_SAMPLE_RECT;
3176 }
3177
3178 shader_glsl_get_sample_function(gl_info, sampler_type, sample_flags, &sample_function);
3179 shader_glsl_add_src_param(ins, &ins->src[0], sample_function.coord_mask, &coord_param);
3180 shader_glsl_add_src_param(ins, &ins->src[2], sample_function.coord_mask, &dx_param);
3181 shader_glsl_add_src_param(ins, &ins->src[3], sample_function.coord_mask, &dy_param);
3182
3183 shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, swizzle, dx_param.param_str, dy_param.param_str, NULL,
3184 "%s", coord_param.param_str);
3185}
3186
3187static void shader_glsl_texldl(const struct wined3d_shader_instruction *ins)
3188{
3189 IWineD3DBaseShaderImpl *This = (IWineD3DBaseShaderImpl *)ins->ctx->shader;
3190 IWineD3DDeviceImpl* deviceImpl = (IWineD3DDeviceImpl*) This->baseShader.device;
3191 const struct wined3d_gl_info *gl_info = ins->ctx->gl_info;
3192 glsl_sample_function_t sample_function;
3193 glsl_src_param_t coord_param, lod_param;
3194 DWORD sample_flags = WINED3D_GLSL_SAMPLE_LOD;
3195 DWORD sampler_type;
3196 DWORD sampler_idx;
3197 DWORD swizzle = ins->src[1].swizzle;
3198
3199 sampler_idx = ins->src[1].reg.idx;
3200 sampler_type = ins->ctx->reg_maps->sampler_type[sampler_idx];
3201 if(deviceImpl->stateBlock->textures[sampler_idx] &&
3202 IWineD3DBaseTexture_GetTextureDimensions(deviceImpl->stateBlock->textures[sampler_idx]) == GL_TEXTURE_RECTANGLE_ARB) {
3203 sample_flags |= WINED3D_GLSL_SAMPLE_RECT;
3204 }
3205 shader_glsl_get_sample_function(gl_info, sampler_type, sample_flags, &sample_function);
3206 shader_glsl_add_src_param(ins, &ins->src[0], sample_function.coord_mask, &coord_param);
3207
3208 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &lod_param);
3209
3210 if (!gl_info->supported[ARB_SHADER_TEXTURE_LOD] && !gl_info->supported[EXT_GPU_SHADER4]
3211 && shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type))
3212 {
3213 /* The GLSL spec claims the Lod sampling functions are only supported in vertex shaders.
3214 * However, they seem to work just fine in fragment shaders as well. */
3215 WARN("Using %s in fragment shader.\n", sample_function.name);
3216 }
3217 shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, swizzle, NULL, NULL, lod_param.param_str,
3218 "%s", coord_param.param_str);
3219}
3220
3221static void shader_glsl_texcoord(const struct wined3d_shader_instruction *ins)
3222{
3223 /* FIXME: Make this work for more than just 2D textures */
3224 struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
3225 DWORD write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
3226
3227 if (!(ins->ctx->reg_maps->shader_version.major == 1 && ins->ctx->reg_maps->shader_version.minor == 4))
3228 {
3229 char dst_mask[6];
3230
3231 shader_glsl_get_write_mask(&ins->dst[0], dst_mask);
3232 shader_addline(buffer, "clamp(gl_TexCoord[%u], 0.0, 1.0)%s);\n",
3233 ins->dst[0].reg.idx, dst_mask);
3234 } else {
3235 DWORD reg = ins->src[0].reg.idx;
3236 DWORD src_mod = ins->src[0].modifiers;
3237 char dst_swizzle[6];
3238
3239 shader_glsl_get_swizzle(&ins->src[0], FALSE, write_mask, dst_swizzle);
3240
3241 if (src_mod == WINED3DSPSM_DZ) {
3242 glsl_src_param_t div_param;
3243 unsigned int mask_size = shader_glsl_get_write_mask_size(write_mask);
3244 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_2, &div_param);
3245
3246 if (mask_size > 1) {
3247 shader_addline(buffer, "gl_TexCoord[%u]%s / vec%d(%s));\n", reg, dst_swizzle, mask_size, div_param.param_str);
3248 } else {
3249 shader_addline(buffer, "gl_TexCoord[%u]%s / %s);\n", reg, dst_swizzle, div_param.param_str);
3250 }
3251 } else if (src_mod == WINED3DSPSM_DW) {
3252 glsl_src_param_t div_param;
3253 unsigned int mask_size = shader_glsl_get_write_mask_size(write_mask);
3254 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &div_param);
3255
3256 if (mask_size > 1) {
3257 shader_addline(buffer, "gl_TexCoord[%u]%s / vec%d(%s));\n", reg, dst_swizzle, mask_size, div_param.param_str);
3258 } else {
3259 shader_addline(buffer, "gl_TexCoord[%u]%s / %s);\n", reg, dst_swizzle, div_param.param_str);
3260 }
3261 } else {
3262 shader_addline(buffer, "gl_TexCoord[%u]%s);\n", reg, dst_swizzle);
3263 }
3264 }
3265}
3266
3267/** Process the WINED3DSIO_TEXDP3TEX instruction in GLSL:
3268 * Take a 3-component dot product of the TexCoord[dstreg] and src,
3269 * then perform a 1D texture lookup from stage dstregnum, place into dst. */
3270static void shader_glsl_texdp3tex(const struct wined3d_shader_instruction *ins)
3271{
3272 const struct wined3d_gl_info *gl_info = ins->ctx->gl_info;
3273 glsl_src_param_t src0_param;
3274 glsl_sample_function_t sample_function;
3275 DWORD sampler_idx = ins->dst[0].reg.idx;
3276 DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
3277 WINED3DSAMPLER_TEXTURE_TYPE sampler_type = ins->ctx->reg_maps->sampler_type[sampler_idx];
3278 UINT mask_size;
3279
3280 shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
3281
3282 /* Do I have to take care about the projected bit? I don't think so, since the dp3 returns only one
3283 * scalar, and projected sampling would require 4.
3284 *
3285 * It is a dependent read - not valid with conditional NP2 textures
3286 */
3287 shader_glsl_get_sample_function(gl_info, sampler_type, 0, &sample_function);
3288 mask_size = shader_glsl_get_write_mask_size(sample_function.coord_mask);
3289
3290 switch(mask_size)
3291 {
3292 case 1:
3293 shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL,
3294 "dot(gl_TexCoord[%u].xyz, %s)", sampler_idx, src0_param.param_str);
3295 break;
3296
3297 case 2:
3298 shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL,
3299 "vec2(dot(gl_TexCoord[%u].xyz, %s), 0.0)", sampler_idx, src0_param.param_str);
3300 break;
3301
3302 case 3:
3303 shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL,
3304 "vec3(dot(gl_TexCoord[%u].xyz, %s), 0.0, 0.0)", sampler_idx, src0_param.param_str);
3305 break;
3306
3307 default:
3308 FIXME("Unexpected mask size %u\n", mask_size);
3309 break;
3310 }
3311}
3312
3313/** Process the WINED3DSIO_TEXDP3 instruction in GLSL:
3314 * Take a 3-component dot product of the TexCoord[dstreg] and src. */
3315static void shader_glsl_texdp3(const struct wined3d_shader_instruction *ins)
3316{
3317 glsl_src_param_t src0_param;
3318 DWORD dstreg = ins->dst[0].reg.idx;
3319 DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
3320 DWORD dst_mask;
3321 unsigned int mask_size;
3322
3323 dst_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
3324 mask_size = shader_glsl_get_write_mask_size(dst_mask);
3325 shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
3326
3327 if (mask_size > 1) {
3328 shader_addline(ins->ctx->buffer, "vec%d(dot(T%u.xyz, %s)));\n", mask_size, dstreg, src0_param.param_str);
3329 } else {
3330 shader_addline(ins->ctx->buffer, "dot(T%u.xyz, %s));\n", dstreg, src0_param.param_str);
3331 }
3332}
3333
3334/** Process the WINED3DSIO_TEXDEPTH instruction in GLSL:
3335 * Calculate the depth as dst.x / dst.y */
3336static void shader_glsl_texdepth(const struct wined3d_shader_instruction *ins)
3337{
3338 glsl_dst_param_t dst_param;
3339
3340 shader_glsl_add_dst_param(ins, &ins->dst[0], &dst_param);
3341
3342 /* Tests show that texdepth never returns anything below 0.0, and that r5.y is clamped to 1.0.
3343 * Negative input is accepted, -0.25 / -0.5 returns 0.5. GL should clamp gl_FragDepth to [0;1], but
3344 * this doesn't always work, so clamp the results manually. Whether or not the x value is clamped at 1
3345 * too is irrelevant, since if x = 0, any y value < 1.0 (and > 1.0 is not allowed) results in a result
3346 * >= 1.0 or < 0.0
3347 */
3348 shader_addline(ins->ctx->buffer, "gl_FragDepth = clamp((%s.x / min(%s.y, 1.0)), 0.0, 1.0);\n",
3349 dst_param.reg_name, dst_param.reg_name);
3350}
3351
3352/** Process the WINED3DSIO_TEXM3X2DEPTH instruction in GLSL:
3353 * Last row of a 3x2 matrix multiply, use the result to calculate the depth:
3354 * Calculate tmp0.y = TexCoord[dstreg] . src.xyz; (tmp0.x has already been calculated)
3355 * depth = (tmp0.y == 0.0) ? 1.0 : tmp0.x / tmp0.y
3356 */
3357static void shader_glsl_texm3x2depth(const struct wined3d_shader_instruction *ins)
3358{
3359 DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
3360 DWORD dstreg = ins->dst[0].reg.idx;
3361 glsl_src_param_t src0_param;
3362
3363 shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
3364
3365 shader_addline(ins->ctx->buffer, "tmp0.y = dot(T%u.xyz, %s);\n", dstreg, src0_param.param_str);
3366 shader_addline(ins->ctx->buffer, "gl_FragDepth = (tmp0.y == 0.0) ? 1.0 : clamp(tmp0.x / tmp0.y, 0.0, 1.0);\n");
3367}
3368
3369/** Process the WINED3DSIO_TEXM3X2PAD instruction in GLSL
3370 * Calculate the 1st of a 2-row matrix multiplication. */
3371static void shader_glsl_texm3x2pad(const struct wined3d_shader_instruction *ins)
3372{
3373 DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
3374 DWORD reg = ins->dst[0].reg.idx;
3375 struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
3376 glsl_src_param_t src0_param;
3377
3378 shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
3379 shader_addline(buffer, "tmp0.x = dot(T%u.xyz, %s);\n", reg, src0_param.param_str);
3380}
3381
3382/** Process the WINED3DSIO_TEXM3X3PAD instruction in GLSL
3383 * Calculate the 1st or 2nd row of a 3-row matrix multiplication. */
3384static void shader_glsl_texm3x3pad(const struct wined3d_shader_instruction *ins)
3385{
3386 IWineD3DBaseShaderImpl *shader = (IWineD3DBaseShaderImpl *)ins->ctx->shader;
3387 DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
3388 DWORD reg = ins->dst[0].reg.idx;
3389 struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
3390 SHADER_PARSE_STATE* current_state = &shader->baseShader.parse_state;
3391 glsl_src_param_t src0_param;
3392
3393 shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
3394 shader_addline(buffer, "tmp0.%c = dot(T%u.xyz, %s);\n", 'x' + current_state->current_row, reg, src0_param.param_str);
3395 current_state->texcoord_w[current_state->current_row++] = reg;
3396}
3397
3398static void shader_glsl_texm3x2tex(const struct wined3d_shader_instruction *ins)
3399{
3400 const struct wined3d_gl_info *gl_info = ins->ctx->gl_info;
3401 DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
3402 DWORD reg = ins->dst[0].reg.idx;
3403 struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
3404 glsl_src_param_t src0_param;
3405 WINED3DSAMPLER_TEXTURE_TYPE sampler_type = ins->ctx->reg_maps->sampler_type[reg];
3406 glsl_sample_function_t sample_function;
3407
3408 shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
3409 shader_addline(buffer, "tmp0.y = dot(T%u.xyz, %s);\n", reg, src0_param.param_str);
3410
3411 shader_glsl_get_sample_function(gl_info, sampler_type, 0, &sample_function);
3412
3413 /* Sample the texture using the calculated coordinates */
3414 shader_glsl_gen_sample_code(ins, reg, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL, "tmp0.xy");
3415}
3416
3417/** Process the WINED3DSIO_TEXM3X3TEX instruction in GLSL
3418 * Perform the 3rd row of a 3x3 matrix multiply, then sample the texture using the calculated coordinates */
3419static void shader_glsl_texm3x3tex(const struct wined3d_shader_instruction *ins)
3420{
3421 DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
3422 IWineD3DBaseShaderImpl *shader = (IWineD3DBaseShaderImpl *)ins->ctx->shader;
3423 SHADER_PARSE_STATE *current_state = &shader->baseShader.parse_state;
3424 const struct wined3d_gl_info *gl_info = ins->ctx->gl_info;
3425 glsl_src_param_t src0_param;
3426 DWORD reg = ins->dst[0].reg.idx;
3427 WINED3DSAMPLER_TEXTURE_TYPE sampler_type = ins->ctx->reg_maps->sampler_type[reg];
3428 glsl_sample_function_t sample_function;
3429
3430 shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
3431 shader_addline(ins->ctx->buffer, "tmp0.z = dot(T%u.xyz, %s);\n", reg, src0_param.param_str);
3432
3433 /* Dependent read, not valid with conditional NP2 */
3434 shader_glsl_get_sample_function(gl_info, sampler_type, 0, &sample_function);
3435
3436 /* Sample the texture using the calculated coordinates */
3437 shader_glsl_gen_sample_code(ins, reg, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL, "tmp0.xyz");
3438
3439 current_state->current_row = 0;
3440}
3441
3442/** Process the WINED3DSIO_TEXM3X3 instruction in GLSL
3443 * Perform the 3rd row of a 3x3 matrix multiply */
3444static void shader_glsl_texm3x3(const struct wined3d_shader_instruction *ins)
3445{
3446 DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
3447 IWineD3DBaseShaderImpl *shader = (IWineD3DBaseShaderImpl *)ins->ctx->shader;
3448 SHADER_PARSE_STATE *current_state = &shader->baseShader.parse_state;
3449 glsl_src_param_t src0_param;
3450 char dst_mask[6];
3451 DWORD reg = ins->dst[0].reg.idx;
3452
3453 shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
3454
3455 shader_glsl_append_dst(ins->ctx->buffer, ins);
3456 shader_glsl_get_write_mask(&ins->dst[0], dst_mask);
3457 shader_addline(ins->ctx->buffer, "vec4(tmp0.xy, dot(T%u.xyz, %s), 1.0)%s);\n", reg, src0_param.param_str, dst_mask);
3458
3459 current_state->current_row = 0;
3460}
3461
3462/* Process the WINED3DSIO_TEXM3X3SPEC instruction in GLSL
3463 * Perform the final texture lookup based on the previous 2 3x3 matrix multiplies */
3464static void shader_glsl_texm3x3spec(const struct wined3d_shader_instruction *ins)
3465{
3466 IWineD3DBaseShaderImpl *shader = (IWineD3DBaseShaderImpl *)ins->ctx->shader;
3467 const struct wined3d_gl_info *gl_info = ins->ctx->gl_info;
3468 DWORD reg = ins->dst[0].reg.idx;
3469 glsl_src_param_t src0_param;
3470 glsl_src_param_t src1_param;
3471 struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
3472 SHADER_PARSE_STATE* current_state = &shader->baseShader.parse_state;
3473 WINED3DSAMPLER_TEXTURE_TYPE stype = ins->ctx->reg_maps->sampler_type[reg];
3474 DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
3475 glsl_sample_function_t sample_function;
3476
3477 shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
3478 shader_glsl_add_src_param(ins, &ins->src[1], src_mask, &src1_param);
3479
3480 /* Perform the last matrix multiply operation */
3481 shader_addline(buffer, "tmp0.z = dot(T%u.xyz, %s);\n", reg, src0_param.param_str);
3482 /* Reflection calculation */
3483 shader_addline(buffer, "tmp0.xyz = -reflect((%s), normalize(tmp0.xyz));\n", src1_param.param_str);
3484
3485 /* Dependent read, not valid with conditional NP2 */
3486 shader_glsl_get_sample_function(gl_info, stype, 0, &sample_function);
3487
3488 /* Sample the texture */
3489 shader_glsl_gen_sample_code(ins, reg, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL, "tmp0.xyz");
3490
3491 current_state->current_row = 0;
3492}
3493
3494/* Process the WINED3DSIO_TEXM3X3VSPEC instruction in GLSL
3495 * Perform the final texture lookup based on the previous 2 3x3 matrix multiplies */
3496static void shader_glsl_texm3x3vspec(const struct wined3d_shader_instruction *ins)
3497{
3498 IWineD3DBaseShaderImpl *shader = (IWineD3DBaseShaderImpl *)ins->ctx->shader;
3499 const struct wined3d_gl_info *gl_info = ins->ctx->gl_info;
3500 DWORD reg = ins->dst[0].reg.idx;
3501 struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
3502 SHADER_PARSE_STATE* current_state = &shader->baseShader.parse_state;
3503 glsl_src_param_t src0_param;
3504 DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
3505 WINED3DSAMPLER_TEXTURE_TYPE sampler_type = ins->ctx->reg_maps->sampler_type[reg];
3506 glsl_sample_function_t sample_function;
3507
3508 shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
3509
3510 /* Perform the last matrix multiply operation */
3511 shader_addline(buffer, "tmp0.z = dot(vec3(T%u), vec3(%s));\n", reg, src0_param.param_str);
3512
3513 /* Construct the eye-ray vector from w coordinates */
3514 shader_addline(buffer, "tmp1.xyz = normalize(vec3(gl_TexCoord[%u].w, gl_TexCoord[%u].w, gl_TexCoord[%u].w));\n",
3515 current_state->texcoord_w[0], current_state->texcoord_w[1], reg);
3516 shader_addline(buffer, "tmp0.xyz = -reflect(tmp1.xyz, normalize(tmp0.xyz));\n");
3517
3518 /* Dependent read, not valid with conditional NP2 */
3519 shader_glsl_get_sample_function(gl_info, sampler_type, 0, &sample_function);
3520
3521 /* Sample the texture using the calculated coordinates */
3522 shader_glsl_gen_sample_code(ins, reg, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL, "tmp0.xyz");
3523
3524 current_state->current_row = 0;
3525}
3526
3527/** Process the WINED3DSIO_TEXBEM instruction in GLSL.
3528 * Apply a fake bump map transform.
3529 * texbem is pshader <= 1.3 only, this saves a few version checks
3530 */
3531static void shader_glsl_texbem(const struct wined3d_shader_instruction *ins)
3532{
3533 IWineD3DBaseShaderImpl *shader = (IWineD3DBaseShaderImpl *)ins->ctx->shader;
3534 IWineD3DDeviceImpl *deviceImpl = (IWineD3DDeviceImpl *)shader->baseShader.device;
3535 const struct wined3d_gl_info *gl_info = ins->ctx->gl_info;
3536 const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data;
3537 glsl_sample_function_t sample_function;
3538 glsl_src_param_t coord_param;
3539 WINED3DSAMPLER_TEXTURE_TYPE sampler_type;
3540 DWORD sampler_idx;
3541 DWORD mask;
3542 DWORD flags;
3543 char coord_mask[6];
3544
3545 sampler_idx = ins->dst[0].reg.idx;
3546 flags = (priv->cur_ps_args->tex_transform >> (sampler_idx * WINED3D_PSARGS_TEXTRANSFORM_SHIFT))
3547 & WINED3D_PSARGS_TEXTRANSFORM_MASK;
3548
3549 sampler_type = ins->ctx->reg_maps->sampler_type[sampler_idx];
3550 /* Dependent read, not valid with conditional NP2 */
3551 shader_glsl_get_sample_function(gl_info, sampler_type, 0, &sample_function);
3552 mask = sample_function.coord_mask;
3553
3554 shader_glsl_write_mask_to_str(mask, coord_mask);
3555
3556 /* with projective textures, texbem only divides the static texture coord, not the displacement,
3557 * so we can't let the GL handle this.
3558 */
3559 if (flags & WINED3D_PSARGS_PROJECTED) {
3560 DWORD div_mask=0;
3561 char coord_div_mask[3];
3562 switch (flags & ~WINED3D_PSARGS_PROJECTED) {
3563 case WINED3DTTFF_COUNT1: FIXME("WINED3DTTFF_PROJECTED with WINED3DTTFF_COUNT1?\n"); break;
3564 case WINED3DTTFF_COUNT2: div_mask = WINED3DSP_WRITEMASK_1; break;
3565 case WINED3DTTFF_COUNT3: div_mask = WINED3DSP_WRITEMASK_2; break;
3566 case WINED3DTTFF_COUNT4:
3567 case WINED3DTTFF_DISABLE: div_mask = WINED3DSP_WRITEMASK_3; break;
3568 }
3569 shader_glsl_write_mask_to_str(div_mask, coord_div_mask);
3570 shader_addline(ins->ctx->buffer, "T%u%s /= T%u%s;\n", sampler_idx, coord_mask, sampler_idx, coord_div_mask);
3571 }
3572
3573 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1, &coord_param);
3574
3575 shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL,
3576 "T%u%s + vec4(bumpenvmat%d * %s, 0.0, 0.0)%s", sampler_idx, coord_mask, sampler_idx,
3577 coord_param.param_str, coord_mask);
3578
3579 if (ins->handler_idx == WINED3DSIH_TEXBEML)
3580 {
3581 glsl_src_param_t luminance_param;
3582 glsl_dst_param_t dst_param;
3583
3584 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_2, &luminance_param);
3585 shader_glsl_add_dst_param(ins, &ins->dst[0], &dst_param);
3586
3587 shader_addline(ins->ctx->buffer, "%s%s *= (%s * luminancescale%d + luminanceoffset%d);\n",
3588 dst_param.reg_name, dst_param.mask_str,
3589 luminance_param.param_str, sampler_idx, sampler_idx);
3590 }
3591}
3592
3593static void shader_glsl_bem(const struct wined3d_shader_instruction *ins)
3594{
3595 glsl_src_param_t src0_param, src1_param;
3596 DWORD sampler_idx = ins->dst[0].reg.idx;
3597
3598 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1, &src0_param);
3599 shader_glsl_add_src_param(ins, &ins->src[1], WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1, &src1_param);
3600
3601 shader_glsl_append_dst(ins->ctx->buffer, ins);
3602 shader_addline(ins->ctx->buffer, "%s + bumpenvmat%d * %s);\n",
3603 src0_param.param_str, sampler_idx, src1_param.param_str);
3604}
3605
3606/** Process the WINED3DSIO_TEXREG2AR instruction in GLSL
3607 * Sample 2D texture at dst using the alpha & red (wx) components of src as texture coordinates */
3608static void shader_glsl_texreg2ar(const struct wined3d_shader_instruction *ins)
3609{
3610 const struct wined3d_gl_info *gl_info = ins->ctx->gl_info;
3611 glsl_src_param_t src0_param;
3612 DWORD sampler_idx = ins->dst[0].reg.idx;
3613 WINED3DSAMPLER_TEXTURE_TYPE sampler_type = ins->ctx->reg_maps->sampler_type[sampler_idx];
3614 glsl_sample_function_t sample_function;
3615
3616 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_ALL, &src0_param);
3617
3618 shader_glsl_get_sample_function(gl_info, sampler_type, 0, &sample_function);
3619 shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL,
3620 "%s.wx", src0_param.reg_name);
3621}
3622
3623/** Process the WINED3DSIO_TEXREG2GB instruction in GLSL
3624 * Sample 2D texture at dst using the green & blue (yz) components of src as texture coordinates */
3625static void shader_glsl_texreg2gb(const struct wined3d_shader_instruction *ins)
3626{
3627 const struct wined3d_gl_info *gl_info = ins->ctx->gl_info;
3628 glsl_src_param_t src0_param;
3629 DWORD sampler_idx = ins->dst[0].reg.idx;
3630 WINED3DSAMPLER_TEXTURE_TYPE sampler_type = ins->ctx->reg_maps->sampler_type[sampler_idx];
3631 glsl_sample_function_t sample_function;
3632
3633 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_ALL, &src0_param);
3634
3635 shader_glsl_get_sample_function(gl_info, sampler_type, 0, &sample_function);
3636 shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL,
3637 "%s.yz", src0_param.reg_name);
3638}
3639
3640/** Process the WINED3DSIO_TEXREG2RGB instruction in GLSL
3641 * Sample texture at dst using the rgb (xyz) components of src as texture coordinates */
3642static void shader_glsl_texreg2rgb(const struct wined3d_shader_instruction *ins)
3643{
3644 const struct wined3d_gl_info *gl_info = ins->ctx->gl_info;
3645 glsl_src_param_t src0_param;
3646 DWORD sampler_idx = ins->dst[0].reg.idx;
3647 WINED3DSAMPLER_TEXTURE_TYPE sampler_type = ins->ctx->reg_maps->sampler_type[sampler_idx];
3648 glsl_sample_function_t sample_function;
3649
3650 /* Dependent read, not valid with conditional NP2 */
3651 shader_glsl_get_sample_function(gl_info, sampler_type, 0, &sample_function);
3652 shader_glsl_add_src_param(ins, &ins->src[0], sample_function.coord_mask, &src0_param);
3653
3654 shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL,
3655 "%s", src0_param.param_str);
3656}
3657
3658/** Process the WINED3DSIO_TEXKILL instruction in GLSL.
3659 * If any of the first 3 components are < 0, discard this pixel */
3660static void shader_glsl_texkill(const struct wined3d_shader_instruction *ins)
3661{
3662 glsl_dst_param_t dst_param;
3663
3664 /* The argument is a destination parameter, and no writemasks are allowed */
3665 shader_glsl_add_dst_param(ins, &ins->dst[0], &dst_param);
3666 if (ins->ctx->reg_maps->shader_version.major >= 2)
3667 {
3668 /* 2.0 shaders compare all 4 components in texkill */
3669 shader_addline(ins->ctx->buffer, "if (any(lessThan(%s.xyzw, vec4(0.0)))) discard;\n", dst_param.reg_name);
3670 } else {
3671 /* 1.X shaders only compare the first 3 components, probably due to the nature of the texkill
3672 * instruction as a tex* instruction, and phase, which kills all a / w components. Even if all
3673 * 4 components are defined, only the first 3 are used
3674 */
3675 shader_addline(ins->ctx->buffer, "if (any(lessThan(%s.xyz, vec3(0.0)))) discard;\n", dst_param.reg_name);
3676 }
3677}
3678
3679/** Process the WINED3DSIO_DP2ADD instruction in GLSL.
3680 * dst = dot2(src0, src1) + src2 */
3681static void shader_glsl_dp2add(const struct wined3d_shader_instruction *ins)
3682{
3683 glsl_src_param_t src0_param;
3684 glsl_src_param_t src1_param;
3685 glsl_src_param_t src2_param;
3686 DWORD write_mask;
3687 unsigned int mask_size;
3688
3689 write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
3690 mask_size = shader_glsl_get_write_mask_size(write_mask);
3691
3692 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1, &src0_param);
3693 shader_glsl_add_src_param(ins, &ins->src[1], WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1, &src1_param);
3694 shader_glsl_add_src_param(ins, &ins->src[2], WINED3DSP_WRITEMASK_0, &src2_param);
3695
3696 if (mask_size > 1) {
3697 shader_addline(ins->ctx->buffer, "vec%d(dot(%s, %s) + %s));\n",
3698 mask_size, src0_param.param_str, src1_param.param_str, src2_param.param_str);
3699 } else {
3700 shader_addline(ins->ctx->buffer, "dot(%s, %s) + %s);\n",
3701 src0_param.param_str, src1_param.param_str, src2_param.param_str);
3702 }
3703}
3704
3705static void shader_glsl_input_pack(IWineD3DPixelShader *iface, struct wined3d_shader_buffer *buffer,
3706 const struct wined3d_shader_signature_element *input_signature, const struct shader_reg_maps *reg_maps,
3707 enum vertexprocessing_mode vertexprocessing)
3708{
3709 unsigned int i;
3710 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
3711 WORD map = reg_maps->input_registers;
3712
3713 for (i = 0; map; map >>= 1, ++i)
3714 {
3715 const char *semantic_name;
3716 UINT semantic_idx;
3717 char reg_mask[6];
3718
3719 /* Unused */
3720 if (!(map & 1)) continue;
3721
3722 semantic_name = input_signature[i].semantic_name;
3723 semantic_idx = input_signature[i].semantic_idx;
3724 shader_glsl_write_mask_to_str(input_signature[i].mask, reg_mask);
3725
3726 if (shader_match_semantic(semantic_name, WINED3DDECLUSAGE_TEXCOORD))
3727 {
3728 if (semantic_idx < 8 && vertexprocessing == pretransformed)
3729 shader_addline(buffer, "IN[%u]%s = gl_TexCoord[%u]%s;\n",
3730 This->input_reg_map[i], reg_mask, semantic_idx, reg_mask);
3731 else
3732 shader_addline(buffer, "IN[%u]%s = vec4(0.0, 0.0, 0.0, 0.0)%s;\n",
3733 This->input_reg_map[i], reg_mask, reg_mask);
3734 }
3735 else if (shader_match_semantic(semantic_name, WINED3DDECLUSAGE_COLOR))
3736 {
3737 if (semantic_idx == 0)
3738 shader_addline(buffer, "IN[%u]%s = vec4(gl_Color)%s;\n",
3739 This->input_reg_map[i], reg_mask, reg_mask);
3740 else if (semantic_idx == 1)
3741 shader_addline(buffer, "IN[%u]%s = vec4(gl_SecondaryColor)%s;\n",
3742 This->input_reg_map[i], reg_mask, reg_mask);
3743 else
3744 shader_addline(buffer, "IN[%u]%s = vec4(0.0, 0.0, 0.0, 0.0)%s;\n",
3745 This->input_reg_map[i], reg_mask, reg_mask);
3746 }
3747 else
3748 {
3749 shader_addline(buffer, "IN[%u]%s = vec4(0.0, 0.0, 0.0, 0.0)%s;\n",
3750 This->input_reg_map[i], reg_mask, reg_mask);
3751 }
3752 }
3753}
3754
3755/*********************************************
3756 * Vertex Shader Specific Code begins here
3757 ********************************************/
3758
3759static void add_glsl_program_entry(struct shader_glsl_priv *priv, struct glsl_shader_prog_link *entry) {
3760 glsl_program_key_t key;
3761
3762 key.vshader = entry->vshader;
3763 key.pshader = entry->pshader;
3764 key.vs_args = entry->vs_args;
3765 key.ps_args = entry->ps_args;
3766 key.context = entry->context;
3767
3768 if (wine_rb_put(&priv->program_lookup, &key, &entry->program_lookup_entry) == -1)
3769 {
3770 ERR("Failed to insert program entry.\n");
3771 }
3772}
3773
3774static struct glsl_shader_prog_link *get_glsl_program_entry(struct shader_glsl_priv *priv,
3775 IWineD3DVertexShader *vshader, IWineD3DPixelShader *pshader, struct vs_compile_args *vs_args,
3776 struct ps_compile_args *ps_args, const struct wined3d_context *context) {
3777 struct wine_rb_entry *entry;
3778 glsl_program_key_t key;
3779
3780 key.vshader = vshader;
3781 key.pshader = pshader;
3782 key.vs_args = *vs_args;
3783 key.ps_args = *ps_args;
3784 key.context = context;
3785
3786 entry = wine_rb_get(&priv->program_lookup, &key);
3787 return entry ? WINE_RB_ENTRY_VALUE(entry, struct glsl_shader_prog_link, program_lookup_entry) : NULL;
3788}
3789
3790/* GL locking is done by the caller */
3791static void delete_glsl_program_entry(struct shader_glsl_priv *priv, const struct wined3d_gl_info *gl_info,
3792 struct glsl_shader_prog_link *entry)
3793{
3794 glsl_program_key_t key;
3795
3796 key.vshader = entry->vshader;
3797 key.pshader = entry->pshader;
3798 key.vs_args = entry->vs_args;
3799 key.ps_args = entry->ps_args;
3800 key.context = entry->context;
3801 wine_rb_remove(&priv->program_lookup, &key);
3802
3803 if (context_get_current() == entry->context)
3804 {
3805 TRACE("deleting program %u\n", entry->programId);
3806 GL_EXTCALL(glDeleteObjectARB(entry->programId));
3807 checkGLcall("glDeleteObjectARB");
3808 }
3809 else
3810 {
3811 WARN("Attempting to delete program %u created in ctx %p from ctx %p\n", entry->programId, entry->context, context_get_current());
3812 }
3813
3814 if (entry->vshader) list_remove(&entry->vshader_entry);
3815 if (entry->pshader) list_remove(&entry->pshader_entry);
3816 HeapFree(GetProcessHeap(), 0, entry->vuniformF_locations);
3817 HeapFree(GetProcessHeap(), 0, entry->puniformF_locations);
3818 HeapFree(GetProcessHeap(), 0, entry);
3819}
3820
3821static void handle_ps3_input(struct wined3d_shader_buffer *buffer, const struct wined3d_gl_info *gl_info, const DWORD *map,
3822 const struct wined3d_shader_signature_element *input_signature, const struct shader_reg_maps *reg_maps_in,
3823 const struct wined3d_shader_signature_element *output_signature, const struct shader_reg_maps *reg_maps_out)
3824{
3825 unsigned int i, j;
3826 const char *semantic_name_in, *semantic_name_out;
3827 UINT semantic_idx_in, semantic_idx_out;
3828 DWORD *set;
3829 DWORD in_idx;
3830 unsigned int in_count = vec4_varyings(3, gl_info);
3831 char reg_mask[6], reg_mask_out[6];
3832 char destination[50];
3833 WORD input_map, output_map;
3834
3835 set = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*set) * (in_count + 2));
3836
3837 if (!output_signature)
3838 {
3839 /* Save gl_FrontColor & gl_FrontSecondaryColor before overwriting them. */
3840 shader_addline(buffer, "vec4 front_color = gl_FrontColor;\n");
3841 shader_addline(buffer, "vec4 front_secondary_color = gl_FrontSecondaryColor;\n");
3842 }
3843
3844 input_map = reg_maps_in->input_registers;
3845 for (i = 0; input_map; input_map >>= 1, ++i)
3846 {
3847 if (!(input_map & 1)) continue;
3848
3849 in_idx = map[i];
3850 if (in_idx >= (in_count + 2)) {
3851 FIXME("More input varyings declared than supported, expect issues\n");
3852 continue;
3853 }
3854 else if (map[i] == ~0U)
3855 {
3856 /* Declared, but not read register */
3857 continue;
3858 }
3859
3860 if (in_idx == in_count) {
3861 sprintf(destination, "gl_FrontColor");
3862 } else if (in_idx == in_count + 1) {
3863 sprintf(destination, "gl_FrontSecondaryColor");
3864 } else {
3865 sprintf(destination, "IN[%u]", in_idx);
3866 }
3867
3868 semantic_name_in = input_signature[i].semantic_name;
3869 semantic_idx_in = input_signature[i].semantic_idx;
3870 set[map[i]] = input_signature[i].mask;
3871 shader_glsl_write_mask_to_str(input_signature[i].mask, reg_mask);
3872
3873 if (!output_signature)
3874 {
3875 if (shader_match_semantic(semantic_name_in, WINED3DDECLUSAGE_COLOR))
3876 {
3877 if (semantic_idx_in == 0)
3878 shader_addline(buffer, "%s%s = front_color%s;\n",
3879 destination, reg_mask, reg_mask);
3880 else if (semantic_idx_in == 1)
3881 shader_addline(buffer, "%s%s = front_secondary_color%s;\n",
3882 destination, reg_mask, reg_mask);
3883 else
3884 shader_addline(buffer, "%s%s = vec4(0.0, 0.0, 0.0, 0.0)%s;\n",
3885 destination, reg_mask, reg_mask);
3886 }
3887 else if (shader_match_semantic(semantic_name_in, WINED3DDECLUSAGE_TEXCOORD))
3888 {
3889 if (semantic_idx_in < 8)
3890 {
3891 shader_addline(buffer, "%s%s = gl_TexCoord[%u]%s;\n",
3892 destination, reg_mask, semantic_idx_in, reg_mask);
3893 }
3894 else
3895 {
3896 shader_addline(buffer, "%s%s = vec4(0.0, 0.0, 0.0, 0.0)%s;\n",
3897 destination, reg_mask, reg_mask);
3898 }
3899 }
3900 else if (shader_match_semantic(semantic_name_in, WINED3DDECLUSAGE_FOG))
3901 {
3902 shader_addline(buffer, "%s%s = vec4(gl_FogFragCoord, 0.0, 0.0, 0.0)%s;\n",
3903 destination, reg_mask, reg_mask);
3904 }
3905 else
3906 {
3907 shader_addline(buffer, "%s%s = vec4(0.0, 0.0, 0.0, 0.0)%s;\n",
3908 destination, reg_mask, reg_mask);
3909 }
3910 } else {
3911 BOOL found = FALSE;
3912
3913 output_map = reg_maps_out->output_registers;
3914 for (j = 0; output_map; output_map >>= 1, ++j)
3915 {
3916 if (!(output_map & 1)) continue;
3917
3918 semantic_name_out = output_signature[j].semantic_name;
3919 semantic_idx_out = output_signature[j].semantic_idx;
3920 shader_glsl_write_mask_to_str(output_signature[j].mask, reg_mask_out);
3921
3922 if (semantic_idx_in == semantic_idx_out
3923 && !strcmp(semantic_name_in, semantic_name_out))
3924 {
3925 shader_addline(buffer, "%s%s = OUT[%u]%s;\n",
3926 destination, reg_mask, j, reg_mask);
3927 found = TRUE;
3928 }
3929 }
3930 if(!found) {
3931 shader_addline(buffer, "%s%s = vec4(0.0, 0.0, 0.0, 0.0)%s;\n",
3932 destination, reg_mask, reg_mask);
3933 }
3934 }
3935 }
3936
3937 /* This is solely to make the compiler / linker happy and avoid warning about undefined
3938 * varyings. It shouldn't result in any real code executed on the GPU, since all read
3939 * input varyings are assigned above, if the optimizer works properly.
3940 */
3941 for(i = 0; i < in_count + 2; i++) {
3942 if (set[i] && set[i] != WINED3DSP_WRITEMASK_ALL)
3943 {
3944 unsigned int size = 0;
3945 memset(reg_mask, 0, sizeof(reg_mask));
3946 if(!(set[i] & WINED3DSP_WRITEMASK_0)) {
3947 reg_mask[size] = 'x';
3948 size++;
3949 }
3950 if(!(set[i] & WINED3DSP_WRITEMASK_1)) {
3951 reg_mask[size] = 'y';
3952 size++;
3953 }
3954 if(!(set[i] & WINED3DSP_WRITEMASK_2)) {
3955 reg_mask[size] = 'z';
3956 size++;
3957 }
3958 if(!(set[i] & WINED3DSP_WRITEMASK_3)) {
3959 reg_mask[size] = 'w';
3960 size++;
3961 }
3962
3963 if (i == in_count) {
3964 sprintf(destination, "gl_FrontColor");
3965 } else if (i == in_count + 1) {
3966 sprintf(destination, "gl_FrontSecondaryColor");
3967 } else {
3968 sprintf(destination, "IN[%u]", i);
3969 }
3970
3971 if (size == 1) {
3972 shader_addline(buffer, "%s.%s = 0.0;\n", destination, reg_mask);
3973 } else {
3974 shader_addline(buffer, "%s.%s = vec%u(0.0);\n", destination, reg_mask, size);
3975 }
3976 }
3977 }
3978
3979 HeapFree(GetProcessHeap(), 0, set);
3980}
3981
3982static void generate_texcoord_assignment(struct wined3d_shader_buffer *buffer, IWineD3DVertexShaderImpl *vs, IWineD3DPixelShaderImpl *ps)
3983{
3984 DWORD map;
3985 unsigned int i;
3986 char reg_mask[6];
3987
3988 if (!ps)
3989 return;
3990
3991 for (i = 0, map = ps->baseShader.reg_maps.texcoord; map && i < min(8, MAX_REG_TEXCRD); map >>= 1, ++i)
3992 {
3993 if (!map & 1)
3994 continue;
3995
3996 /* so far we assume that if texcoord_mask has any write flags, they are assigned appropriately with pixel shader */
3997 if ((vs->baseShader.reg_maps.texcoord_mask[i]) & WINED3DSP_WRITEMASK_ALL)
3998 continue;
3999
4000 shader_glsl_write_mask_to_str(WINED3DSP_WRITEMASK_ALL, reg_mask);
4001 shader_addline(buffer, "gl_TexCoord[%u]%s = gl_MultiTexCoord%u%s;\n", i, reg_mask, i, reg_mask);
4002 }
4003}
4004
4005/* GL locking is done by the caller */
4006static GLhandleARB generate_param_reorder_function(struct wined3d_shader_buffer *buffer,
4007 IWineD3DVertexShader *vertexshader, IWineD3DPixelShader *pixelshader, const struct wined3d_gl_info *gl_info)
4008{
4009 GLhandleARB ret = 0;
4010 IWineD3DVertexShaderImpl *vs = (IWineD3DVertexShaderImpl *) vertexshader;
4011 IWineD3DPixelShaderImpl *ps = (IWineD3DPixelShaderImpl *) pixelshader;
4012 IWineD3DDeviceImpl *device;
4013 DWORD vs_major = vs->baseShader.reg_maps.shader_version.major;
4014 DWORD ps_major = ps ? ps->baseShader.reg_maps.shader_version.major : 0;
4015 unsigned int i;
4016 const char *semantic_name;
4017 UINT semantic_idx;
4018 char reg_mask[6];
4019 const struct wined3d_shader_signature_element *output_signature;
4020
4021 shader_buffer_clear(buffer);
4022
4023 shader_addline(buffer, "#version 120\n");
4024
4025 if(vs_major < 3 && ps_major < 3) {
4026 /* That one is easy: The vertex shader writes to the builtin varyings, the pixel shader reads from them.
4027 * Take care about the texcoord .w fixup though if we're using the fixed function fragment pipeline
4028 */
4029 device = (IWineD3DDeviceImpl *) vs->baseShader.device;
4030 if ((gl_info->quirks & WINED3D_QUIRK_SET_TEXCOORD_W)
4031 && ps_major == 0 && vs_major > 0 && !device->frag_pipe->ffp_proj_control)
4032 {
4033 shader_addline(buffer, "void order_ps_input() {\n");
4034 for(i = 0; i < min(8, MAX_REG_TEXCRD); i++) {
4035 if(vs->baseShader.reg_maps.texcoord_mask[i] != 0 &&
4036 vs->baseShader.reg_maps.texcoord_mask[i] != WINED3DSP_WRITEMASK_ALL) {
4037 shader_addline(buffer, "gl_TexCoord[%u].w = 1.0;\n", i);
4038 }
4039 }
4040 shader_addline(buffer, "}\n");
4041 } else {
4042 shader_addline(buffer, "void order_ps_input() {\n");
4043 generate_texcoord_assignment(buffer, vs, ps);
4044 shader_addline(buffer, "}\n");
4045 }
4046 } else if(ps_major < 3 && vs_major >= 3) {
4047 WORD map = vs->baseShader.reg_maps.output_registers;
4048
4049 /* The vertex shader writes to its own varyings, the pixel shader needs them in the builtin ones */
4050 output_signature = vs->baseShader.output_signature;
4051
4052 shader_addline(buffer, "void order_ps_input(in vec4 OUT[%u]) {\n", MAX_REG_OUTPUT);
4053 for (i = 0; map; map >>= 1, ++i)
4054 {
4055 DWORD write_mask;
4056
4057 if (!(map & 1)) continue;
4058
4059 semantic_name = output_signature[i].semantic_name;
4060 semantic_idx = output_signature[i].semantic_idx;
4061 write_mask = output_signature[i].mask;
4062 shader_glsl_write_mask_to_str(write_mask, reg_mask);
4063
4064 if (shader_match_semantic(semantic_name, WINED3DDECLUSAGE_COLOR))
4065 {
4066 if (semantic_idx == 0)
4067 shader_addline(buffer, "gl_FrontColor%s = OUT[%u]%s;\n", reg_mask, i, reg_mask);
4068 else if (semantic_idx == 1)
4069 shader_addline(buffer, "gl_FrontSecondaryColor%s = OUT[%u]%s;\n", reg_mask, i, reg_mask);
4070 }
4071 else if (shader_match_semantic(semantic_name, WINED3DDECLUSAGE_POSITION))
4072 {
4073 shader_addline(buffer, "gl_Position%s = OUT[%u]%s;\n", reg_mask, i, reg_mask);
4074 }
4075 else if (shader_match_semantic(semantic_name, WINED3DDECLUSAGE_TEXCOORD))
4076 {
4077 if (semantic_idx < 8)
4078 {
4079 if (!(gl_info->quirks & WINED3D_QUIRK_SET_TEXCOORD_W) || ps_major > 0)
4080 write_mask |= WINED3DSP_WRITEMASK_3;
4081
4082 shader_addline(buffer, "gl_TexCoord[%u]%s = OUT[%u]%s;\n",
4083 semantic_idx, reg_mask, i, reg_mask);
4084 if (!(write_mask & WINED3DSP_WRITEMASK_3))
4085 shader_addline(buffer, "gl_TexCoord[%u].w = 1.0;\n", semantic_idx);
4086 }
4087 }
4088 else if (shader_match_semantic(semantic_name, WINED3DDECLUSAGE_PSIZE))
4089 {
4090 shader_addline(buffer, "gl_PointSize = OUT[%u].x;\n", i);
4091 }
4092 else if (shader_match_semantic(semantic_name, WINED3DDECLUSAGE_FOG))
4093 {
4094 shader_addline(buffer, "gl_FogFragCoord = OUT[%u].%c;\n", i, reg_mask[1]);
4095 }
4096 }
4097 shader_addline(buffer, "}\n");
4098
4099 } else if(ps_major >= 3 && vs_major >= 3) {
4100 WORD map = vs->baseShader.reg_maps.output_registers;
4101
4102 output_signature = vs->baseShader.output_signature;
4103
4104 /* This one is tricky: a 3.0 pixel shader reads from a 3.0 vertex shader */
4105 shader_addline(buffer, "varying vec4 IN[%u];\n", vec4_varyings(3, gl_info));
4106 shader_addline(buffer, "void order_ps_input(in vec4 OUT[%u]) {\n", MAX_REG_OUTPUT);
4107
4108 /* First, sort out position and point size. Those are not passed to the pixel shader */
4109 for (i = 0; map; map >>= 1, ++i)
4110 {
4111 if (!(map & 1)) continue;
4112
4113 semantic_name = output_signature[i].semantic_name;
4114 shader_glsl_write_mask_to_str(output_signature[i].mask, reg_mask);
4115
4116 if (shader_match_semantic(semantic_name, WINED3DDECLUSAGE_POSITION))
4117 {
4118 shader_addline(buffer, "gl_Position%s = OUT[%u]%s;\n", reg_mask, i, reg_mask);
4119 }
4120 else if (shader_match_semantic(semantic_name, WINED3DDECLUSAGE_PSIZE))
4121 {
4122 shader_addline(buffer, "gl_PointSize = OUT[%u].x;\n", i);
4123 }
4124 }
4125
4126 /* Then, fix the pixel shader input */
4127 handle_ps3_input(buffer, gl_info, ps->input_reg_map, ps->baseShader.input_signature,
4128 &ps->baseShader.reg_maps, output_signature, &vs->baseShader.reg_maps);
4129
4130 shader_addline(buffer, "}\n");
4131 } else if(ps_major >= 3 && vs_major < 3) {
4132 shader_addline(buffer, "varying vec4 IN[%u];\n", vec4_varyings(3, gl_info));
4133 shader_addline(buffer, "void order_ps_input() {\n");
4134 /* The vertex shader wrote to the builtin varyings. There is no need to figure out position and
4135 * point size, but we depend on the optimizers kindness to find out that the pixel shader doesn't
4136 * read gl_TexCoord and gl_ColorX, otherwise we'll run out of varyings
4137 */
4138 handle_ps3_input(buffer, gl_info, ps->input_reg_map, ps->baseShader.input_signature,
4139 &ps->baseShader.reg_maps, NULL, NULL);
4140 shader_addline(buffer, "}\n");
4141 } else {
4142 ERR("Unexpected vertex and pixel shader version condition: vs: %d, ps: %d\n", vs_major, ps_major);
4143 }
4144
4145 ret = GL_EXTCALL(glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB));
4146 checkGLcall("glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB)");
4147 GL_EXTCALL(glShaderSourceARB(ret, 1, (const char**)&buffer->buffer, NULL));
4148 checkGLcall("glShaderSourceARB(ret, 1, &buffer->buffer, NULL)");
4149 GL_EXTCALL(glCompileShaderARB(ret));
4150 checkGLcall("glCompileShaderARB(ret)");
4151 shader_glsl_validate_compile_link(gl_info, ret, FALSE);
4152 return ret;
4153}
4154
4155/* GL locking is done by the caller */
4156static void hardcode_local_constants(IWineD3DBaseShaderImpl *shader, const struct wined3d_gl_info *gl_info,
4157 GLhandleARB programId, char prefix)
4158{
4159 const local_constant *lconst;
4160 GLint tmp_loc;
4161 const float *value;
4162 char glsl_name[8];
4163
4164 LIST_FOR_EACH_ENTRY(lconst, &shader->baseShader.constantsF, local_constant, entry) {
4165 value = (const float *)lconst->value;
4166 snprintf(glsl_name, sizeof(glsl_name), "%cLC%u", prefix, lconst->idx);
4167 tmp_loc = GL_EXTCALL(glGetUniformLocationARB(programId, glsl_name));
4168 GL_EXTCALL(glUniform4fvARB(tmp_loc, 1, value));
4169 }
4170 checkGLcall("Hardcoding local constants");
4171}
4172
4173/* GL locking is done by the caller */
4174static GLuint shader_glsl_generate_pshader(const struct wined3d_context *context,
4175 struct wined3d_shader_buffer *buffer, IWineD3DPixelShaderImpl *This,
4176 const struct ps_compile_args *args, struct ps_np2fixup_info *np2fixup_info)
4177{
4178 const struct shader_reg_maps *reg_maps = &This->baseShader.reg_maps;
4179 const struct wined3d_gl_info *gl_info = context->gl_info;
4180 CONST DWORD *function = This->baseShader.function;
4181 struct shader_glsl_ctx_priv priv_ctx;
4182
4183 /* Create the hw GLSL shader object and assign it as the shader->prgId */
4184 GLhandleARB shader_obj = GL_EXTCALL(glCreateShaderObjectARB(GL_FRAGMENT_SHADER_ARB));
4185
4186 memset(&priv_ctx, 0, sizeof(priv_ctx));
4187 priv_ctx.cur_ps_args = args;
4188 priv_ctx.cur_np2fixup_info = np2fixup_info;
4189
4190 shader_addline(buffer, "#version 120\n");
4191
4192 if (gl_info->supported[ARB_SHADER_TEXTURE_LOD] && reg_maps->usestexldd)
4193 {
4194 shader_addline(buffer, "#extension GL_ARB_shader_texture_lod : enable\n");
4195 }
4196 if (gl_info->supported[ARB_TEXTURE_RECTANGLE])
4197 {
4198 /* The spec says that it doesn't have to be explicitly enabled, but the nvidia
4199 * drivers write a warning if we don't do so
4200 */
4201 shader_addline(buffer, "#extension GL_ARB_texture_rectangle : enable\n");
4202 }
4203 if (gl_info->supported[EXT_GPU_SHADER4])
4204 {
4205 shader_addline(buffer, "#extension GL_EXT_gpu_shader4 : enable\n");
4206 }
4207
4208 /* Base Declarations */
4209 shader_generate_glsl_declarations(context, buffer, (IWineD3DBaseShader *)This, reg_maps, &priv_ctx);
4210
4211 /* Pack 3.0 inputs */
4212 if (reg_maps->shader_version.major >= 3 && args->vp_mode != vertexshader)
4213 {
4214 shader_glsl_input_pack((IWineD3DPixelShader *) This, buffer,
4215 This->baseShader.input_signature, reg_maps, args->vp_mode);
4216 }
4217
4218 /* Base Shader Body */
4219 shader_generate_main((IWineD3DBaseShader *)This, buffer, reg_maps, function, &priv_ctx);
4220
4221 /* Pixel shaders < 2.0 place the resulting color in R0 implicitly */
4222 if (reg_maps->shader_version.major < 2)
4223 {
4224 /* Some older cards like GeforceFX ones don't support multiple buffers, so also not gl_FragData */
4225 shader_addline(buffer, "gl_FragData[0] = R0;\n");
4226 }
4227
4228 if (args->srgb_correction)
4229 {
4230 shader_addline(buffer, "tmp0.xyz = pow(gl_FragData[0].xyz, vec3(srgb_const0.x));\n");
4231 shader_addline(buffer, "tmp0.xyz = tmp0.xyz * vec3(srgb_const0.y) - vec3(srgb_const0.z);\n");
4232 shader_addline(buffer, "tmp1.xyz = gl_FragData[0].xyz * vec3(srgb_const0.w);\n");
4233 shader_addline(buffer, "bvec3 srgb_compare = lessThan(gl_FragData[0].xyz, vec3(srgb_const1.x));\n");
4234 shader_addline(buffer, "gl_FragData[0].xyz = mix(tmp0.xyz, tmp1.xyz, vec3(srgb_compare));\n");
4235 shader_addline(buffer, "gl_FragData[0] = clamp(gl_FragData[0], 0.0, 1.0);\n");
4236 }
4237 /* Pixel shader < 3.0 do not replace the fog stage.
4238 * This implements linear fog computation and blending.
4239 * TODO: non linear fog
4240 * NOTE: gl_Fog.start and gl_Fog.end don't hold fog start s and end e but
4241 * -1/(e-s) and e/(e-s) respectively.
4242 */
4243 if (reg_maps->shader_version.major < 3)
4244 {
4245 switch(args->fog) {
4246 case FOG_OFF: break;
4247 case FOG_LINEAR:
4248 shader_addline(buffer, "float fogstart = -1.0 / (gl_Fog.end - gl_Fog.start);\n");
4249 shader_addline(buffer, "float fogend = gl_Fog.end * -fogstart;\n");
4250 shader_addline(buffer, "float Fog = clamp(gl_FogFragCoord * fogstart + fogend, 0.0, 1.0);\n");
4251 shader_addline(buffer, "gl_FragData[0].xyz = mix(gl_Fog.color.xyz, gl_FragData[0].xyz, Fog);\n");
4252 break;
4253 case FOG_EXP:
4254 /* Fog = e^(-gl_Fog.density * gl_FogFragCoord) */
4255 shader_addline(buffer, "float Fog = exp(-gl_Fog.density * gl_FogFragCoord);\n");
4256 shader_addline(buffer, "Fog = clamp(Fog, 0.0, 1.0);\n");
4257 shader_addline(buffer, "gl_FragData[0].xyz = mix(gl_Fog.color.xyz, gl_FragData[0].xyz, Fog);\n");
4258 break;
4259 case FOG_EXP2:
4260 /* Fog = e^(-(gl_Fog.density * gl_FogFragCoord)^2) */
4261 shader_addline(buffer, "float Fog = exp(-gl_Fog.density * gl_Fog.density * gl_FogFragCoord * gl_FogFragCoord);\n");
4262 shader_addline(buffer, "Fog = clamp(Fog, 0.0, 1.0);\n");
4263 shader_addline(buffer, "gl_FragData[0].xyz = mix(gl_Fog.color.xyz, gl_FragData[0].xyz, Fog);\n");
4264 break;
4265 }
4266 }
4267
4268 shader_addline(buffer, "}\n");
4269
4270 TRACE("Compiling shader object %u\n", shader_obj);
4271 GL_EXTCALL(glShaderSourceARB(shader_obj, 1, (const char**)&buffer->buffer, NULL));
4272 GL_EXTCALL(glCompileShaderARB(shader_obj));
4273 shader_glsl_validate_compile_link(gl_info, shader_obj, FALSE);
4274
4275 /* Store the shader object */
4276 return shader_obj;
4277}
4278
4279/* GL locking is done by the caller */
4280static GLuint shader_glsl_generate_vshader(const struct wined3d_context *context,
4281 struct wined3d_shader_buffer *buffer, IWineD3DVertexShaderImpl *This,
4282 const struct vs_compile_args *args)
4283{
4284 const struct shader_reg_maps *reg_maps = &This->baseShader.reg_maps;
4285 const struct wined3d_gl_info *gl_info = context->gl_info;
4286 CONST DWORD *function = This->baseShader.function;
4287 struct shader_glsl_ctx_priv priv_ctx;
4288
4289 /* Create the hw GLSL shader program and assign it as the shader->prgId */
4290 GLhandleARB shader_obj = GL_EXTCALL(glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB));
4291
4292 shader_addline(buffer, "#version 120\n");
4293
4294 if (gl_info->supported[EXT_GPU_SHADER4])
4295 {
4296 shader_addline(buffer, "#extension GL_EXT_gpu_shader4 : enable\n");
4297 }
4298
4299 memset(&priv_ctx, 0, sizeof(priv_ctx));
4300 priv_ctx.cur_vs_args = args;
4301
4302 /* Base Declarations */
4303 shader_generate_glsl_declarations(context, buffer, (IWineD3DBaseShader *)This, reg_maps, &priv_ctx);
4304
4305 /* Base Shader Body */
4306 shader_generate_main((IWineD3DBaseShader*)This, buffer, reg_maps, function, &priv_ctx);
4307
4308 /* Unpack 3.0 outputs */
4309 if (reg_maps->shader_version.major >= 3) shader_addline(buffer, "order_ps_input(OUT);\n");
4310 else shader_addline(buffer, "order_ps_input();\n");
4311
4312 /* The D3DRS_FOGTABLEMODE render state defines if the shader-generated fog coord is used
4313 * or if the fragment depth is used. If the fragment depth is used(FOGTABLEMODE != NONE),
4314 * the fog frag coord is thrown away. If the fog frag coord is used, but not written by
4315 * the shader, it is set to 0.0(fully fogged, since start = 1.0, end = 0.0)
4316 */
4317 if(args->fog_src == VS_FOG_Z) {
4318 shader_addline(buffer, "gl_FogFragCoord = gl_Position.z;\n");
4319 } else if (!reg_maps->fog) {
4320 shader_addline(buffer, "gl_FogFragCoord = 0.0;\n");
4321 }
4322
4323 /* Write the final position.
4324 *
4325 * OpenGL coordinates specify the center of the pixel while d3d coords specify
4326 * the corner. The offsets are stored in z and w in posFixup. posFixup.y contains
4327 * 1.0 or -1.0 to turn the rendering upside down for offscreen rendering. PosFixup.x
4328 * contains 1.0 to allow a mad.
4329 */
4330 shader_addline(buffer, "gl_Position.y = gl_Position.y * posFixup.y;\n");
4331 shader_addline(buffer, "gl_Position.xy += posFixup.zw * gl_Position.ww;\n");
4332 if(args->clip_enabled) {
4333 shader_addline(buffer, "gl_ClipVertex = gl_Position;\n");
4334 }
4335
4336 /* Z coord [0;1]->[-1;1] mapping, see comment in transform_projection in state.c
4337 *
4338 * Basically we want (in homogeneous coordinates) z = z * 2 - 1. However, shaders are run
4339 * before the homogeneous divide, so we have to take the w into account: z = ((z / w) * 2 - 1) * w,
4340 * which is the same as z = z * 2 - w.
4341 */
4342 shader_addline(buffer, "gl_Position.z = gl_Position.z * 2.0 - gl_Position.w;\n");
4343
4344 shader_addline(buffer, "}\n");
4345
4346 TRACE("Compiling shader object %u\n", shader_obj);
4347 GL_EXTCALL(glShaderSourceARB(shader_obj, 1, (const char**)&buffer->buffer, NULL));
4348 GL_EXTCALL(glCompileShaderARB(shader_obj));
4349 shader_glsl_validate_compile_link(gl_info, shader_obj, FALSE);
4350
4351 return shader_obj;
4352}
4353
4354static GLhandleARB find_glsl_pshader(const struct wined3d_context *context,
4355 struct wined3d_shader_buffer *buffer, IWineD3DPixelShaderImpl *shader,
4356 const struct ps_compile_args *args,
4357 UINT *inp2fixup_info
4358 )
4359{
4360 UINT i;
4361 DWORD new_size;
4362 struct glsl_ps_compiled_shader *new_array;
4363 struct glsl_pshader_private *shader_data;
4364 struct ps_np2fixup_info *np2fixup = NULL;
4365 GLhandleARB ret;
4366
4367 if (!shader->baseShader.backend_data)
4368 {
4369 shader->baseShader.backend_data = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*shader_data));
4370 if (!shader->baseShader.backend_data)
4371 {
4372 ERR("Failed to allocate backend data.\n");
4373 return 0;
4374 }
4375 }
4376 shader_data = shader->baseShader.backend_data;
4377
4378 /* Usually we have very few GL shaders for each d3d shader(just 1 or maybe 2),
4379 * so a linear search is more performant than a hashmap or a binary search
4380 * (cache coherency etc)
4381 */
4382 for(i = 0; i < shader_data->num_gl_shaders; i++) {
4383 if(shader_data->gl_shaders[i].context==context
4384 && memcmp(&shader_data->gl_shaders[i].args, args, sizeof(*args)) == 0) {
4385 if(args->np2_fixup) {
4386 *inp2fixup_info = i;
4387 }
4388 return shader_data->gl_shaders[i].prgId;
4389 }
4390 }
4391
4392 TRACE("No matching GL shader found for shader %p, compiling a new shader.\n", shader);
4393 if(shader_data->shader_array_size == shader_data->num_gl_shaders) {
4394 if (shader_data->num_gl_shaders)
4395 {
4396 new_size = shader_data->shader_array_size + max(1, shader_data->shader_array_size / 2);
4397 new_array = HeapReAlloc(GetProcessHeap(), 0, shader_data->gl_shaders,
4398 new_size * sizeof(*shader_data->gl_shaders));
4399 } else {
4400 new_array = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*shader_data->gl_shaders));
4401 new_size = 1;
4402 }
4403
4404 if(!new_array) {
4405 ERR("Out of memory\n");
4406 return 0;
4407 }
4408 shader_data->gl_shaders = new_array;
4409 shader_data->shader_array_size = new_size;
4410 }
4411
4412 shader_data->gl_shaders[shader_data->num_gl_shaders].context = context;
4413 shader_data->gl_shaders[shader_data->num_gl_shaders].args = *args;
4414
4415 memset(&shader_data->gl_shaders[shader_data->num_gl_shaders].np2fixup, 0, sizeof(struct ps_np2fixup_info));
4416 if (args->np2_fixup) np2fixup = &shader_data->gl_shaders[shader_data->num_gl_shaders].np2fixup;
4417
4418 pixelshader_update_samplers(&shader->baseShader.reg_maps,
4419 ((IWineD3DDeviceImpl *)shader->baseShader.device)->stateBlock->textures);
4420
4421 shader_buffer_clear(buffer);
4422 ret = shader_glsl_generate_pshader(context, buffer, shader, args, np2fixup);
4423 *inp2fixup_info = shader_data->num_gl_shaders;
4424 shader_data->gl_shaders[shader_data->num_gl_shaders++].prgId = ret;
4425
4426 return ret;
4427}
4428
4429static inline BOOL vs_args_equal(const struct vs_compile_args *stored, const struct vs_compile_args *new,
4430 const DWORD use_map) {
4431 if((stored->swizzle_map & use_map) != new->swizzle_map) return FALSE;
4432 if((stored->clip_enabled) != new->clip_enabled) return FALSE;
4433 return stored->fog_src == new->fog_src;
4434}
4435
4436static GLhandleARB find_glsl_vshader(const struct wined3d_context *context,
4437 struct wined3d_shader_buffer *buffer, IWineD3DVertexShaderImpl *shader,
4438 const struct vs_compile_args *args)
4439{
4440 UINT i;
4441 DWORD new_size;
4442 struct glsl_vs_compiled_shader *new_array;
4443 DWORD use_map = ((IWineD3DDeviceImpl *)shader->baseShader.device)->strided_streams.use_map;
4444 struct glsl_vshader_private *shader_data;
4445 GLhandleARB ret;
4446
4447 if (!shader->baseShader.backend_data)
4448 {
4449 shader->baseShader.backend_data = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*shader_data));
4450 if (!shader->baseShader.backend_data)
4451 {
4452 ERR("Failed to allocate backend data.\n");
4453 return 0;
4454 }
4455 }
4456 shader_data = shader->baseShader.backend_data;
4457
4458 /* Usually we have very few GL shaders for each d3d shader(just 1 or maybe 2),
4459 * so a linear search is more performant than a hashmap or a binary search
4460 * (cache coherency etc)
4461 */
4462 for(i = 0; i < shader_data->num_gl_shaders; i++) {
4463 if(shader_data->gl_shaders[i].context==context
4464 && vs_args_equal(&shader_data->gl_shaders[i].args, args, use_map)) {
4465 return shader_data->gl_shaders[i].prgId;
4466 }
4467 }
4468
4469 TRACE("No matching GL shader found for shader %p, compiling a new shader.\n", shader);
4470
4471 if(shader_data->shader_array_size == shader_data->num_gl_shaders) {
4472 if (shader_data->num_gl_shaders)
4473 {
4474 new_size = shader_data->shader_array_size + max(1, shader_data->shader_array_size / 2);
4475 new_array = HeapReAlloc(GetProcessHeap(), 0, shader_data->gl_shaders,
4476 new_size * sizeof(*shader_data->gl_shaders));
4477 } else {
4478 new_array = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*shader_data->gl_shaders));
4479 new_size = 1;
4480 }
4481
4482 if(!new_array) {
4483 ERR("Out of memory\n");
4484 return 0;
4485 }
4486 shader_data->gl_shaders = new_array;
4487 shader_data->shader_array_size = new_size;
4488 }
4489
4490 shader_data->gl_shaders[shader_data->num_gl_shaders].context = context;
4491 shader_data->gl_shaders[shader_data->num_gl_shaders].args = *args;
4492
4493 shader_buffer_clear(buffer);
4494 ret = shader_glsl_generate_vshader(context, buffer, shader, args);
4495 shader_data->gl_shaders[shader_data->num_gl_shaders++].prgId = ret;
4496
4497 return ret;
4498}
4499
4500/** Sets the GLSL program ID for the given pixel and vertex shader combination.
4501 * It sets the programId on the current StateBlock (because it should be called
4502 * inside of the DrawPrimitive() part of the render loop).
4503 *
4504 * If a program for the given combination does not exist, create one, and store
4505 * the program in the hash table. If it creates a program, it will link the
4506 * given objects, too.
4507 */
4508
4509/* GL locking is done by the caller */
4510static void set_glsl_shader_program(const struct wined3d_context *context,
4511 IWineD3DDeviceImpl *device, BOOL use_ps, BOOL use_vs)
4512{
4513 IWineD3DVertexShader *vshader = use_vs ? device->stateBlock->vertexShader : NULL;
4514 IWineD3DPixelShader *pshader = use_ps ? device->stateBlock->pixelShader : NULL;
4515 const struct wined3d_gl_info *gl_info = context->gl_info;
4516 struct shader_glsl_priv *priv = device->shader_priv;
4517 struct glsl_shader_prog_link *entry = NULL;
4518 GLhandleARB programId = 0;
4519 GLhandleARB reorder_shader_id = 0;
4520 unsigned int i;
4521 char glsl_name[8];
4522 struct ps_compile_args ps_compile_args;
4523 struct vs_compile_args vs_compile_args;
4524
4525 if (vshader) find_vs_compile_args((IWineD3DVertexShaderImpl *)vshader, device->stateBlock, &vs_compile_args);
4526 if (pshader) find_ps_compile_args((IWineD3DPixelShaderImpl *)pshader, device->stateBlock, &ps_compile_args);
4527
4528 entry = get_glsl_program_entry(priv, vshader, pshader, &vs_compile_args, &ps_compile_args, context);
4529 if (entry) {
4530 priv->glsl_program = entry;
4531 return;
4532 }
4533
4534 /* If we get to this point, then no matching program exists, so we create one */
4535 programId = GL_EXTCALL(glCreateProgramObjectARB());
4536 TRACE("Created new GLSL shader program %u\n", programId);
4537
4538 /* Create the entry */
4539 entry = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(struct glsl_shader_prog_link));
4540 entry->context = context;
4541 entry->programId = programId;
4542 entry->vshader = vshader;
4543 entry->pshader = pshader;
4544 entry->vs_args = vs_compile_args;
4545 entry->ps_args = ps_compile_args;
4546 entry->constant_version = 0;
4547 WINEFIXUPINFO_INIT(entry);
4548 /* Add the hash table entry */
4549 add_glsl_program_entry(priv, entry);
4550
4551 /* Set the current program */
4552 priv->glsl_program = entry;
4553
4554 /* Attach GLSL vshader */
4555 if (vshader)
4556 {
4557 GLhandleARB vshader_id = find_glsl_vshader(context, &priv->shader_buffer,
4558 (IWineD3DVertexShaderImpl *)vshader, &vs_compile_args);
4559 WORD map = ((IWineD3DBaseShaderImpl *)vshader)->baseShader.reg_maps.input_registers;
4560 char tmp_name[10];
4561
4562 reorder_shader_id = generate_param_reorder_function(&priv->shader_buffer, vshader, pshader, gl_info);
4563 TRACE("Attaching GLSL shader object %u to program %u\n", reorder_shader_id, programId);
4564 GL_EXTCALL(glAttachObjectARB(programId, reorder_shader_id));
4565 checkGLcall("glAttachObjectARB");
4566 /* Flag the reorder function for deletion, then it will be freed automatically when the program
4567 * is destroyed
4568 */
4569 GL_EXTCALL(glDeleteObjectARB(reorder_shader_id));
4570
4571 TRACE("Attaching GLSL shader object %u to program %u\n", vshader_id, programId);
4572 GL_EXTCALL(glAttachObjectARB(programId, vshader_id));
4573 checkGLcall("glAttachObjectARB");
4574
4575 /* Bind vertex attributes to a corresponding index number to match
4576 * the same index numbers as ARB_vertex_programs (makes loading
4577 * vertex attributes simpler). With this method, we can use the
4578 * exact same code to load the attributes later for both ARB and
4579 * GLSL shaders.
4580 *
4581 * We have to do this here because we need to know the Program ID
4582 * in order to make the bindings work, and it has to be done prior
4583 * to linking the GLSL program. */
4584 for (i = 0; map; map >>= 1, ++i)
4585 {
4586 if (!(map & 1)) continue;
4587
4588 snprintf(tmp_name, sizeof(tmp_name), "attrib%u", i);
4589 GL_EXTCALL(glBindAttribLocationARB(programId, i, tmp_name));
4590 }
4591 checkGLcall("glBindAttribLocationARB");
4592
4593 list_add_head(&((IWineD3DBaseShaderImpl *)vshader)->baseShader.linked_programs, &entry->vshader_entry);
4594 }
4595
4596 /* Attach GLSL pshader */
4597 if (pshader)
4598 {
4599 GLhandleARB pshader_id = find_glsl_pshader(context, &priv->shader_buffer,
4600 (IWineD3DPixelShaderImpl *)pshader, &ps_compile_args,
4601 &entry->inp2Fixup_info
4602 );
4603 TRACE("Attaching GLSL shader object %u to program %u\n", pshader_id, programId);
4604 GL_EXTCALL(glAttachObjectARB(programId, pshader_id));
4605 checkGLcall("glAttachObjectARB");
4606
4607 list_add_head(&((IWineD3DBaseShaderImpl *)pshader)->baseShader.linked_programs, &entry->pshader_entry);
4608 }
4609
4610 /* Link the program */
4611 TRACE("Linking GLSL shader program %u\n", programId);
4612 GL_EXTCALL(glLinkProgramARB(programId));
4613 shader_glsl_validate_compile_link(gl_info, programId, TRUE);
4614
4615 entry->vuniformF_locations = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
4616 sizeof(GLhandleARB) * gl_info->limits.glsl_vs_float_constants);
4617 for (i = 0; i < gl_info->limits.glsl_vs_float_constants; ++i)
4618 {
4619 snprintf(glsl_name, sizeof(glsl_name), "VC[%i]", i);
4620 entry->vuniformF_locations[i] = GL_EXTCALL(glGetUniformLocationARB(programId, glsl_name));
4621 }
4622 for (i = 0; i < MAX_CONST_I; ++i)
4623 {
4624 snprintf(glsl_name, sizeof(glsl_name), "VI[%i]", i);
4625 entry->vuniformI_locations[i] = GL_EXTCALL(glGetUniformLocationARB(programId, glsl_name));
4626 }
4627 entry->puniformF_locations = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
4628 sizeof(GLhandleARB) * gl_info->limits.glsl_ps_float_constants);
4629 for (i = 0; i < gl_info->limits.glsl_ps_float_constants; ++i)
4630 {
4631 snprintf(glsl_name, sizeof(glsl_name), "PC[%i]", i);
4632 entry->puniformF_locations[i] = GL_EXTCALL(glGetUniformLocationARB(programId, glsl_name));
4633 }
4634 for (i = 0; i < MAX_CONST_I; ++i)
4635 {
4636 snprintf(glsl_name, sizeof(glsl_name), "PI[%i]", i);
4637 entry->puniformI_locations[i] = GL_EXTCALL(glGetUniformLocationARB(programId, glsl_name));
4638 }
4639
4640 if(pshader) {
4641 char name[32];
4642
4643 for(i = 0; i < MAX_TEXTURES; i++) {
4644 sprintf(name, "bumpenvmat%u", i);
4645 entry->bumpenvmat_location[i] = GL_EXTCALL(glGetUniformLocationARB(programId, name));
4646 sprintf(name, "luminancescale%u", i);
4647 entry->luminancescale_location[i] = GL_EXTCALL(glGetUniformLocationARB(programId, name));
4648 sprintf(name, "luminanceoffset%u", i);
4649 entry->luminanceoffset_location[i] = GL_EXTCALL(glGetUniformLocationARB(programId, name));
4650 }
4651
4652 if (ps_compile_args.np2_fixup) {
4653 if (WINEFIXUPINFO_ISVALID(entry)) {
4654 entry->np2Fixup_location = GL_EXTCALL(glGetUniformLocationARB(programId, "PsamplerNP2Fixup"));
4655 } else {
4656 FIXME("NP2 texcoord fixup needed for this pixelshader, but no fixup uniform found.\n");
4657 }
4658 }
4659 }
4660
4661 entry->posFixup_location = GL_EXTCALL(glGetUniformLocationARB(programId, "posFixup"));
4662 entry->ycorrection_location = GL_EXTCALL(glGetUniformLocationARB(programId, "ycorrection"));
4663 checkGLcall("Find glsl program uniform locations");
4664
4665 if (pshader
4666 && ((IWineD3DPixelShaderImpl *)pshader)->baseShader.reg_maps.shader_version.major >= 3
4667 && ((IWineD3DPixelShaderImpl *)pshader)->declared_in_count > vec4_varyings(3, gl_info))
4668 {
4669 TRACE("Shader %d needs vertex color clamping disabled\n", programId);
4670 entry->vertex_color_clamp = GL_FALSE;
4671 } else {
4672 entry->vertex_color_clamp = GL_FIXED_ONLY_ARB;
4673 }
4674
4675 /* Set the shader to allow uniform loading on it */
4676 GL_EXTCALL(glUseProgramObjectARB(programId));
4677 checkGLcall("glUseProgramObjectARB(programId)");
4678
4679#ifdef DEBUG_misha
4680 {
4681 GLint programIdTest = -1;
4682 glGetIntegerv(GL_CURRENT_PROGRAM, &programIdTest);
4683 Assert(programIdTest == programId);
4684 }
4685#endif
4686
4687 /* Load the vertex and pixel samplers now. The function that finds the mappings makes sure
4688 * that it stays the same for each vertexshader-pixelshader pair(=linked glsl program). If
4689 * a pshader with fixed function pipeline is used there are no vertex samplers, and if a
4690 * vertex shader with fixed function pixel processing is used we make sure that the card
4691 * supports enough samplers to allow the max number of vertex samplers with all possible
4692 * fixed function fragment processing setups. So once the program is linked these samplers
4693 * won't change.
4694 */
4695 if (vshader) shader_glsl_load_vsamplers(gl_info, device->texUnitMap, programId);
4696 if (pshader) shader_glsl_load_psamplers(gl_info, device->texUnitMap, programId);
4697
4698 /* If the local constants do not have to be loaded with the environment constants,
4699 * load them now to have them hardcoded in the GLSL program. This saves some CPU cycles
4700 * later
4701 */
4702 if (pshader && !((IWineD3DBaseShaderImpl *)pshader)->baseShader.load_local_constsF)
4703 {
4704 hardcode_local_constants((IWineD3DBaseShaderImpl *) pshader, gl_info, programId, 'P');
4705 }
4706 if (vshader && !((IWineD3DBaseShaderImpl *)vshader)->baseShader.load_local_constsF)
4707 {
4708 hardcode_local_constants((IWineD3DBaseShaderImpl *) vshader, gl_info, programId, 'V');
4709 }
4710}
4711
4712/* GL locking is done by the caller */
4713static GLhandleARB create_glsl_blt_shader(const struct wined3d_gl_info *gl_info, enum tex_types tex_type)
4714{
4715 GLhandleARB program_id;
4716 GLhandleARB vshader_id, pshader_id;
4717 static const char *blt_vshader[] =
4718 {
4719 "#version 120\n"
4720 "void main(void)\n"
4721 "{\n"
4722 " gl_Position = gl_Vertex;\n"
4723 " gl_FrontColor = vec4(1.0);\n"
4724 " gl_TexCoord[0] = gl_MultiTexCoord0;\n"
4725 "}\n"
4726 };
4727
4728 static const char *blt_pshaders[tex_type_count] =
4729 {
4730 /* tex_1d */
4731 NULL,
4732 /* tex_2d */
4733 "#version 120\n"
4734 "uniform sampler2D sampler;\n"
4735 "void main(void)\n"
4736 "{\n"
4737 " gl_FragDepth = texture2D(sampler, gl_TexCoord[0].xy).x;\n"
4738 "}\n",
4739 /* tex_3d */
4740 NULL,
4741 /* tex_cube */
4742 "#version 120\n"
4743 "uniform samplerCube sampler;\n"
4744 "void main(void)\n"
4745 "{\n"
4746 " gl_FragDepth = textureCube(sampler, gl_TexCoord[0].xyz).x;\n"
4747 "}\n",
4748 /* tex_rect */
4749 "#version 120\n"
4750 "#extension GL_ARB_texture_rectangle : enable\n"
4751 "uniform sampler2DRect sampler;\n"
4752 "void main(void)\n"
4753 "{\n"
4754 " gl_FragDepth = texture2DRect(sampler, gl_TexCoord[0].xy).x;\n"
4755 "}\n",
4756 };
4757
4758 if (!blt_pshaders[tex_type])
4759 {
4760 FIXME("tex_type %#x not supported\n", tex_type);
4761 tex_type = tex_2d;
4762 }
4763
4764 vshader_id = GL_EXTCALL(glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB));
4765 GL_EXTCALL(glShaderSourceARB(vshader_id, 1, blt_vshader, NULL));
4766 GL_EXTCALL(glCompileShaderARB(vshader_id));
4767 shader_glsl_validate_compile_link(gl_info, vshader_id, FALSE);
4768
4769 pshader_id = GL_EXTCALL(glCreateShaderObjectARB(GL_FRAGMENT_SHADER_ARB));
4770 GL_EXTCALL(glShaderSourceARB(pshader_id, 1, &blt_pshaders[tex_type], NULL));
4771 GL_EXTCALL(glCompileShaderARB(pshader_id));
4772
4773 shader_glsl_validate_compile_link(gl_info, vshader_id, FALSE);
4774
4775 program_id = GL_EXTCALL(glCreateProgramObjectARB());
4776 GL_EXTCALL(glAttachObjectARB(program_id, vshader_id));
4777 GL_EXTCALL(glAttachObjectARB(program_id, pshader_id));
4778 GL_EXTCALL(glLinkProgramARB(program_id));
4779 shader_glsl_validate_compile_link(gl_info, program_id, TRUE);
4780
4781 /* Once linked we can mark the shaders for deletion. They will be deleted once the program
4782 * is destroyed
4783 */
4784 GL_EXTCALL(glDeleteObjectARB(vshader_id));
4785 GL_EXTCALL(glDeleteObjectARB(pshader_id));
4786 return program_id;
4787}
4788
4789/* GL locking is done by the caller */
4790static void shader_glsl_select(const struct wined3d_context *context, BOOL usePS, BOOL useVS)
4791{
4792 const struct wined3d_gl_info *gl_info = context->gl_info;
4793 IWineD3DDeviceImpl *device = context_get_device(context);
4794 struct shader_glsl_priv *priv = device->shader_priv;
4795 GLhandleARB program_id = 0;
4796 GLenum old_vertex_color_clamp, current_vertex_color_clamp;
4797
4798 old_vertex_color_clamp = priv->glsl_program ? priv->glsl_program->vertex_color_clamp : GL_FIXED_ONLY_ARB;
4799
4800 if (useVS || usePS) set_glsl_shader_program(context, device, usePS, useVS);
4801 else priv->glsl_program = NULL;
4802
4803 current_vertex_color_clamp = priv->glsl_program ? priv->glsl_program->vertex_color_clamp : GL_FIXED_ONLY_ARB;
4804
4805 if (old_vertex_color_clamp != current_vertex_color_clamp)
4806 {
4807 if (gl_info->supported[ARB_COLOR_BUFFER_FLOAT])
4808 {
4809 GL_EXTCALL(glClampColorARB(GL_CLAMP_VERTEX_COLOR_ARB, current_vertex_color_clamp));
4810 checkGLcall("glClampColorARB");
4811 }
4812 else
4813 {
4814 FIXME("vertex color clamp needs to be changed, but extension not supported.\n");
4815 }
4816 }
4817
4818 program_id = priv->glsl_program ? priv->glsl_program->programId : 0;
4819 if (program_id) TRACE("Using GLSL program %u\n", program_id);
4820 GL_EXTCALL(glUseProgramObjectARB(program_id));
4821 checkGLcall("glUseProgramObjectARB");
4822#ifdef DEBUG_misha
4823 {
4824 GLint programIdTest = -1;
4825 glGetIntegerv(GL_CURRENT_PROGRAM, &programIdTest);
4826 Assert(programIdTest == program_id);
4827 }
4828#endif
4829
4830 /* In case that NP2 texcoord fixup data is found for the selected program, trigger a reload of the
4831 * constants. This has to be done because it can't be guaranteed that sampler() (from state.c) is
4832 * called between selecting the shader and using it, which results in wrong fixup for some frames. */
4833 if (priv->glsl_program && WINEFIXUPINFO_ISVALID(priv->glsl_program))
4834 {
4835 shader_glsl_load_np2fixup_constants((IWineD3DDevice *)device, usePS, useVS);
4836 }
4837}
4838
4839/* GL locking is done by the caller */
4840static void shader_glsl_select_depth_blt(IWineD3DDevice *iface, enum tex_types tex_type) {
4841 IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *)iface;
4842 const struct wined3d_gl_info *gl_info = &This->adapter->gl_info;
4843 struct shader_glsl_priv *priv = This->shader_priv;
4844 GLhandleARB *blt_program = &priv->depth_blt_program[tex_type];
4845
4846 if (!*blt_program) {
4847 GLint loc;
4848 *blt_program = create_glsl_blt_shader(gl_info, tex_type);
4849 loc = GL_EXTCALL(glGetUniformLocationARB(*blt_program, "sampler"));
4850 GL_EXTCALL(glUseProgramObjectARB(*blt_program));
4851#ifdef DEBUG_misha
4852 {
4853 GLint programIdTest = -1;
4854 glGetIntegerv(GL_CURRENT_PROGRAM, &programIdTest);
4855 Assert(programIdTest == *blt_program);
4856 }
4857#endif
4858 GL_EXTCALL(glUniform1iARB(loc, 0));
4859 } else {
4860 GL_EXTCALL(glUseProgramObjectARB(*blt_program));
4861#ifdef DEBUG_misha
4862 {
4863 GLint programIdTest = -1;
4864 glGetIntegerv(GL_CURRENT_PROGRAM, &programIdTest);
4865 Assert(programIdTest == *blt_program);
4866 }
4867#endif
4868 }
4869}
4870
4871/* GL locking is done by the caller */
4872static void shader_glsl_deselect_depth_blt(IWineD3DDevice *iface) {
4873 IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *)iface;
4874 const struct wined3d_gl_info *gl_info = &This->adapter->gl_info;
4875 struct shader_glsl_priv *priv = This->shader_priv;
4876 GLhandleARB program_id;
4877
4878 program_id = priv->glsl_program ? priv->glsl_program->programId : 0;
4879 if (program_id) TRACE("Using GLSL program %u\n", program_id);
4880
4881 GL_EXTCALL(glUseProgramObjectARB(program_id));
4882 checkGLcall("glUseProgramObjectARB");
4883#ifdef DEBUG_misha
4884 {
4885 GLint programIdTest = -1;
4886 glGetIntegerv(GL_CURRENT_PROGRAM, &programIdTest);
4887 Assert(programIdTest == program_id);
4888 }
4889#endif
4890}
4891
4892static void shader_glsl_destroy(IWineD3DBaseShader *iface) {
4893 const struct list *linked_programs;
4894 IWineD3DBaseShaderImpl *This = (IWineD3DBaseShaderImpl *) iface;
4895 IWineD3DDeviceImpl *device = (IWineD3DDeviceImpl *)This->baseShader.device;
4896 struct shader_glsl_priv *priv = device->shader_priv;
4897 const struct wined3d_gl_info *gl_info;
4898 struct wined3d_context *context;
4899
4900 /* Note: Do not use QueryInterface here to find out which shader type this is because this code
4901 * can be called from IWineD3DBaseShader::Release
4902 */
4903 char pshader = shader_is_pshader_version(This->baseShader.reg_maps.shader_version.type);
4904
4905 if(pshader) {
4906 struct glsl_pshader_private *shader_data;
4907 shader_data = This->baseShader.backend_data;
4908 if(!shader_data || shader_data->num_gl_shaders == 0)
4909 {
4910 HeapFree(GetProcessHeap(), 0, shader_data);
4911 This->baseShader.backend_data = NULL;
4912 return;
4913 }
4914
4915 context = context_acquire(device, NULL, CTXUSAGE_RESOURCELOAD);
4916 gl_info = context->gl_info;
4917
4918 if (priv->glsl_program && (IWineD3DBaseShader *)priv->glsl_program->pshader == iface)
4919 {
4920 ENTER_GL();
4921 shader_glsl_select(context, FALSE, FALSE);
4922 LEAVE_GL();
4923 }
4924 } else {
4925 struct glsl_vshader_private *shader_data;
4926 shader_data = This->baseShader.backend_data;
4927 if(!shader_data || shader_data->num_gl_shaders == 0)
4928 {
4929 HeapFree(GetProcessHeap(), 0, shader_data);
4930 This->baseShader.backend_data = NULL;
4931 return;
4932 }
4933
4934 context = context_acquire(device, NULL, CTXUSAGE_RESOURCELOAD);
4935 gl_info = context->gl_info;
4936
4937 if (priv->glsl_program && (IWineD3DBaseShader *)priv->glsl_program->vshader == iface)
4938 {
4939 ENTER_GL();
4940 shader_glsl_select(context, FALSE, FALSE);
4941 LEAVE_GL();
4942 }
4943 }
4944
4945 linked_programs = &This->baseShader.linked_programs;
4946
4947 TRACE("Deleting linked programs\n");
4948 if (linked_programs->next) {
4949 struct glsl_shader_prog_link *entry, *entry2;
4950
4951 ENTER_GL();
4952 if(pshader) {
4953 LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, linked_programs, struct glsl_shader_prog_link, pshader_entry) {
4954 delete_glsl_program_entry(priv, gl_info, entry);
4955 }
4956 } else {
4957 LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, linked_programs, struct glsl_shader_prog_link, vshader_entry) {
4958 delete_glsl_program_entry(priv, gl_info, entry);
4959 }
4960 }
4961 LEAVE_GL();
4962 }
4963
4964 if(pshader) {
4965 UINT i;
4966 struct glsl_pshader_private *shader_data = This->baseShader.backend_data;
4967
4968 ENTER_GL();
4969 for(i = 0; i < shader_data->num_gl_shaders; i++) {
4970 if (shader_data->gl_shaders[i].context==context_get_current())
4971 {
4972 TRACE("deleting pshader %u\n", shader_data->gl_shaders[i].prgId);
4973 GL_EXTCALL(glDeleteObjectARB(shader_data->gl_shaders[i].prgId));
4974 checkGLcall("glDeleteObjectARB");
4975 }
4976 else
4977 {
4978 WARN("Attempting to delete pshader %u created in ctx %p from ctx %p\n",
4979 shader_data->gl_shaders[i].prgId, shader_data->gl_shaders[i].context, context_get_current());
4980 }
4981 }
4982 LEAVE_GL();
4983 HeapFree(GetProcessHeap(), 0, shader_data->gl_shaders);
4984 }
4985 else
4986 {
4987 UINT i;
4988 struct glsl_vshader_private *shader_data = This->baseShader.backend_data;
4989
4990 ENTER_GL();
4991 for(i = 0; i < shader_data->num_gl_shaders; i++) {
4992 if (shader_data->gl_shaders[i].context==context_get_current())
4993 {
4994 TRACE("deleting vshader %u\n", shader_data->gl_shaders[i].prgId);
4995 GL_EXTCALL(glDeleteObjectARB(shader_data->gl_shaders[i].prgId));
4996 checkGLcall("glDeleteObjectARB");
4997 }
4998 else
4999 {
5000 WARN("Attempting to delete vshader %u created in ctx %p from ctx %p\n",
5001 shader_data->gl_shaders[i].prgId, shader_data->gl_shaders[i].context, context_get_current());
5002 }
5003 }
5004 LEAVE_GL();
5005 HeapFree(GetProcessHeap(), 0, shader_data->gl_shaders);
5006 }
5007
5008 HeapFree(GetProcessHeap(), 0, This->baseShader.backend_data);
5009 This->baseShader.backend_data = NULL;
5010
5011 context_release(context);
5012}
5013
5014static int glsl_program_key_compare(const void *key, const struct wine_rb_entry *entry)
5015{
5016 const glsl_program_key_t *k = key;
5017 const struct glsl_shader_prog_link *prog = WINE_RB_ENTRY_VALUE(entry,
5018 const struct glsl_shader_prog_link, program_lookup_entry);
5019 int cmp;
5020
5021 if (k->context > prog->context) return 1;
5022 else if (k->context < prog->context) return -1;
5023
5024 if (k->vshader > prog->vshader) return 1;
5025 else if (k->vshader < prog->vshader) return -1;
5026
5027 if (k->pshader > prog->pshader) return 1;
5028 else if (k->pshader < prog->pshader) return -1;
5029
5030 if (k->vshader && (cmp = memcmp(&k->vs_args, &prog->vs_args, sizeof(prog->vs_args)))) return cmp;
5031 if (k->pshader && (cmp = memcmp(&k->ps_args, &prog->ps_args, sizeof(prog->ps_args)))) return cmp;
5032
5033 return 0;
5034}
5035
5036static BOOL constant_heap_init(struct constant_heap *heap, unsigned int constant_count)
5037{
5038 SIZE_T size = (constant_count + 1) * sizeof(*heap->entries) + constant_count * sizeof(*heap->positions);
5039 void *mem = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, size);
5040
5041 if (!mem)
5042 {
5043 ERR("Failed to allocate memory\n");
5044 return FALSE;
5045 }
5046
5047 heap->entries = mem;
5048 heap->entries[1].version = 0;
5049 heap->positions = (unsigned int *)(heap->entries + constant_count + 1);
5050 heap->size = 1;
5051
5052 return TRUE;
5053}
5054
5055static void constant_heap_free(struct constant_heap *heap)
5056{
5057 HeapFree(GetProcessHeap(), 0, heap->entries);
5058}
5059
5060static const struct wine_rb_functions wined3d_glsl_program_rb_functions =
5061{
5062 wined3d_rb_alloc,
5063 wined3d_rb_realloc,
5064 wined3d_rb_free,
5065 glsl_program_key_compare,
5066};
5067
5068static HRESULT shader_glsl_alloc(IWineD3DDevice *iface) {
5069 IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *)iface;
5070 const struct wined3d_gl_info *gl_info = &This->adapter->gl_info;
5071 struct shader_glsl_priv *priv = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(struct shader_glsl_priv));
5072 SIZE_T stack_size = wined3d_log2i(max(gl_info->limits.glsl_vs_float_constants,
5073 gl_info->limits.glsl_ps_float_constants)) + 1;
5074
5075 if (!shader_buffer_init(&priv->shader_buffer))
5076 {
5077 ERR("Failed to initialize shader buffer.\n");
5078 goto fail;
5079 }
5080
5081 priv->stack = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, stack_size * sizeof(*priv->stack));
5082 if (!priv->stack)
5083 {
5084 ERR("Failed to allocate memory.\n");
5085 goto fail;
5086 }
5087 if (!constant_heap_init(&priv->vconst_heap, gl_info->limits.glsl_vs_float_constants))
5088 {
5089 ERR("Failed to initialize vertex shader constant heap\n");
5090 goto fail;
5091 }
5092 if (!constant_heap_init(&priv->pconst_heap, gl_info->limits.glsl_ps_float_constants))
5093 {
5094 ERR("Failed to initialize pixel shader constant heap\n");
5095 goto fail;
5096 }
5097
5098 if (wine_rb_init(&priv->program_lookup, &wined3d_glsl_program_rb_functions) == -1)
5099 {
5100 ERR("Failed to initialize rbtree.\n");
5101 goto fail;
5102 }
5103
5104 priv->next_constant_version = 1;
5105
5106 This->shader_priv = priv;
5107 return WINED3D_OK;
5108
5109fail:
5110 constant_heap_free(&priv->pconst_heap);
5111 constant_heap_free(&priv->vconst_heap);
5112 HeapFree(GetProcessHeap(), 0, priv->stack);
5113 shader_buffer_free(&priv->shader_buffer);
5114 HeapFree(GetProcessHeap(), 0, priv);
5115 return E_OUTOFMEMORY;
5116}
5117
5118/* Context activation is done by the caller. */
5119static void shader_glsl_free(IWineD3DDevice *iface) {
5120 IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *)iface;
5121 const struct wined3d_gl_info *gl_info = &This->adapter->gl_info;
5122 struct shader_glsl_priv *priv = This->shader_priv;
5123 int i;
5124
5125 ENTER_GL();
5126 for (i = 0; i < tex_type_count; ++i)
5127 {
5128 if (priv->depth_blt_program[i])
5129 {
5130 GL_EXTCALL(glDeleteObjectARB(priv->depth_blt_program[i]));
5131 }
5132 }
5133 LEAVE_GL();
5134
5135 wine_rb_destroy(&priv->program_lookup, NULL, NULL);
5136 constant_heap_free(&priv->pconst_heap);
5137 constant_heap_free(&priv->vconst_heap);
5138 HeapFree(GetProcessHeap(), 0, priv->stack);
5139 shader_buffer_free(&priv->shader_buffer);
5140
5141 HeapFree(GetProcessHeap(), 0, This->shader_priv);
5142 This->shader_priv = NULL;
5143}
5144
5145static BOOL shader_glsl_dirty_const(IWineD3DDevice *iface) {
5146 /* TODO: GL_EXT_bindable_uniform can be used to share constants across shaders */
5147 return FALSE;
5148}
5149
5150static void shader_glsl_get_caps(const struct wined3d_gl_info *gl_info, struct shader_caps *pCaps)
5151{
5152 /* Nvidia Geforce6/7 or Ati R4xx/R5xx cards with GLSL support, support VS 3.0 but older Nvidia/Ati
5153 * models with GLSL support only support 2.0. In case of nvidia we can detect VS 2.0 support based
5154 * on the version of NV_vertex_program.
5155 * For Ati cards there's no way using glsl (it abstracts the lowlevel info away) and also not
5156 * using ARB_vertex_program. It is safe to assume that when a card supports pixel shader 2.0 it
5157 * supports vertex shader 2.0 too and the way around. We can detect ps2.0 using the maximum number
5158 * of native instructions, so use that here. For more info see the pixel shader versioning code below.
5159 */
5160 if ((gl_info->supported[NV_VERTEX_PROGRAM2] && !gl_info->supported[NV_VERTEX_PROGRAM3])
5161 || gl_info->limits.arb_ps_instructions <= 512
5162 || gl_info->limits.glsl_vs_float_constants < 256)
5163 pCaps->VertexShaderVersion = WINED3DVS_VERSION(2,0);
5164 else
5165 pCaps->VertexShaderVersion = WINED3DVS_VERSION(3,0);
5166 TRACE_(d3d_caps)("Hardware vertex shader version %d.%d enabled (GLSL)\n", (pCaps->VertexShaderVersion >> 8) & 0xff, pCaps->VertexShaderVersion & 0xff);
5167 pCaps->MaxVertexShaderConst = gl_info->limits.glsl_vs_float_constants;
5168
5169 /* Older DX9-class videocards (GeforceFX / Radeon >9500/X*00) only support pixel shader 2.0/2.0a/2.0b.
5170 * In OpenGL the extensions related to GLSL abstract lowlevel GL info away which is needed
5171 * to distinguish between 2.0 and 3.0 (and 2.0a/2.0b). In case of Nvidia we use their fragment
5172 * program extensions. On other hardware including ATI GL_ARB_fragment_program offers the info
5173 * in max native instructions. Intel and others also offer the info in this extension but they
5174 * don't support GLSL (at least on Windows).
5175 *
5176 * PS2.0 requires at least 96 instructions, 2.0a/2.0b go up to 512. Assume that if the number
5177 * of instructions is 512 or less we have to do with ps2.0 hardware.
5178 * NOTE: ps3.0 hardware requires 512 or more instructions but ati and nvidia offer 'enough' (1024 vs 4096) on their most basic ps3.0 hardware.
5179 */
5180 if ((gl_info->supported[NV_FRAGMENT_PROGRAM] && !gl_info->supported[NV_FRAGMENT_PROGRAM2])
5181 || gl_info->limits.arb_ps_instructions <= 512
5182 || gl_info->limits.glsl_vs_float_constants < 256)
5183 pCaps->PixelShaderVersion = WINED3DPS_VERSION(2,0);
5184 else
5185 pCaps->PixelShaderVersion = WINED3DPS_VERSION(3,0);
5186
5187 pCaps->MaxPixelShaderConst = gl_info->limits.glsl_ps_float_constants;
5188
5189 /* FIXME: The following line is card dependent. -8.0 to 8.0 is the
5190 * Direct3D minimum requirement.
5191 *
5192 * Both GL_ARB_fragment_program and GLSL require a "maximum representable magnitude"
5193 * of colors to be 2^10, and 2^32 for other floats. Should we use 1024 here?
5194 *
5195 * The problem is that the refrast clamps temporary results in the shader to
5196 * [-MaxValue;+MaxValue]. If the card's max value is bigger than the one we advertize here,
5197 * then applications may miss the clamping behavior. On the other hand, if it is smaller,
5198 * the shader will generate incorrect results too. Unfortunately, GL deliberately doesn't
5199 * offer a way to query this.
5200 */
5201 pCaps->PixelShader1xMaxValue = 8.0;
5202 TRACE_(d3d_caps)("Hardware pixel shader version %d.%d enabled (GLSL)\n", (pCaps->PixelShaderVersion >> 8) & 0xff, pCaps->PixelShaderVersion & 0xff);
5203
5204 pCaps->VSClipping = TRUE;
5205}
5206
5207static BOOL shader_glsl_color_fixup_supported(struct color_fixup_desc fixup)
5208{
5209 if (TRACE_ON(d3d_shader) && TRACE_ON(d3d))
5210 {
5211 TRACE("Checking support for fixup:\n");
5212 dump_color_fixup_desc(fixup);
5213 }
5214
5215 /* We support everything except YUV conversions. */
5216 if (!is_complex_fixup(fixup))
5217 {
5218 TRACE("[OK]\n");
5219 return TRUE;
5220 }
5221
5222 TRACE("[FAILED]\n");
5223 return FALSE;
5224}
5225
5226static const SHADER_HANDLER shader_glsl_instruction_handler_table[WINED3DSIH_TABLE_SIZE] =
5227{
5228 /* WINED3DSIH_ABS */ shader_glsl_map2gl,
5229 /* WINED3DSIH_ADD */ shader_glsl_arith,
5230 /* WINED3DSIH_BEM */ shader_glsl_bem,
5231 /* WINED3DSIH_BREAK */ shader_glsl_break,
5232 /* WINED3DSIH_BREAKC */ shader_glsl_breakc,
5233 /* WINED3DSIH_BREAKP */ NULL,
5234 /* WINED3DSIH_CALL */ shader_glsl_call,
5235 /* WINED3DSIH_CALLNZ */ shader_glsl_callnz,
5236 /* WINED3DSIH_CMP */ shader_glsl_cmp,
5237 /* WINED3DSIH_CND */ shader_glsl_cnd,
5238 /* WINED3DSIH_CRS */ shader_glsl_cross,
5239 /* WINED3DSIH_CUT */ NULL,
5240 /* WINED3DSIH_DCL */ NULL,
5241 /* WINED3DSIH_DEF */ NULL,
5242 /* WINED3DSIH_DEFB */ NULL,
5243 /* WINED3DSIH_DEFI */ NULL,
5244 /* WINED3DSIH_DP2ADD */ shader_glsl_dp2add,
5245 /* WINED3DSIH_DP3 */ shader_glsl_dot,
5246 /* WINED3DSIH_DP4 */ shader_glsl_dot,
5247 /* WINED3DSIH_DST */ shader_glsl_dst,
5248 /* WINED3DSIH_DSX */ shader_glsl_map2gl,
5249 /* WINED3DSIH_DSY */ shader_glsl_map2gl,
5250 /* WINED3DSIH_ELSE */ shader_glsl_else,
5251 /* WINED3DSIH_EMIT */ NULL,
5252 /* WINED3DSIH_ENDIF */ shader_glsl_end,
5253 /* WINED3DSIH_ENDLOOP */ shader_glsl_end,
5254 /* WINED3DSIH_ENDREP */ shader_glsl_end,
5255 /* WINED3DSIH_EXP */ shader_glsl_map2gl,
5256 /* WINED3DSIH_EXPP */ shader_glsl_expp,
5257 /* WINED3DSIH_FRC */ shader_glsl_map2gl,
5258 /* WINED3DSIH_IADD */ NULL,
5259 /* WINED3DSIH_IF */ shader_glsl_if,
5260 /* WINED3DSIH_IFC */ shader_glsl_ifc,
5261 /* WINED3DSIH_IGE */ NULL,
5262 /* WINED3DSIH_LABEL */ shader_glsl_label,
5263 /* WINED3DSIH_LIT */ shader_glsl_lit,
5264 /* WINED3DSIH_LOG */ shader_glsl_log,
5265 /* WINED3DSIH_LOGP */ shader_glsl_log,
5266 /* WINED3DSIH_LOOP */ shader_glsl_loop,
5267 /* WINED3DSIH_LRP */ shader_glsl_lrp,
5268 /* WINED3DSIH_LT */ NULL,
5269 /* WINED3DSIH_M3x2 */ shader_glsl_mnxn,
5270 /* WINED3DSIH_M3x3 */ shader_glsl_mnxn,
5271 /* WINED3DSIH_M3x4 */ shader_glsl_mnxn,
5272 /* WINED3DSIH_M4x3 */ shader_glsl_mnxn,
5273 /* WINED3DSIH_M4x4 */ shader_glsl_mnxn,
5274 /* WINED3DSIH_MAD */ shader_glsl_mad,
5275 /* WINED3DSIH_MAX */ shader_glsl_map2gl,
5276 /* WINED3DSIH_MIN */ shader_glsl_map2gl,
5277 /* WINED3DSIH_MOV */ shader_glsl_mov,
5278 /* WINED3DSIH_MOVA */ shader_glsl_mov,
5279 /* WINED3DSIH_MUL */ shader_glsl_arith,
5280 /* WINED3DSIH_NOP */ NULL,
5281 /* WINED3DSIH_NRM */ shader_glsl_nrm,
5282 /* WINED3DSIH_PHASE */ NULL,
5283 /* WINED3DSIH_POW */ shader_glsl_pow,
5284 /* WINED3DSIH_RCP */ shader_glsl_rcp,
5285 /* WINED3DSIH_REP */ shader_glsl_rep,
5286 /* WINED3DSIH_RET */ shader_glsl_ret,
5287 /* WINED3DSIH_RSQ */ shader_glsl_rsq,
5288 /* WINED3DSIH_SETP */ NULL,
5289 /* WINED3DSIH_SGE */ shader_glsl_compare,
5290 /* WINED3DSIH_SGN */ shader_glsl_sgn,
5291 /* WINED3DSIH_SINCOS */ shader_glsl_sincos,
5292 /* WINED3DSIH_SLT */ shader_glsl_compare,
5293 /* WINED3DSIH_SUB */ shader_glsl_arith,
5294 /* WINED3DSIH_TEX */ shader_glsl_tex,
5295 /* WINED3DSIH_TEXBEM */ shader_glsl_texbem,
5296 /* WINED3DSIH_TEXBEML */ shader_glsl_texbem,
5297 /* WINED3DSIH_TEXCOORD */ shader_glsl_texcoord,
5298 /* WINED3DSIH_TEXDEPTH */ shader_glsl_texdepth,
5299 /* WINED3DSIH_TEXDP3 */ shader_glsl_texdp3,
5300 /* WINED3DSIH_TEXDP3TEX */ shader_glsl_texdp3tex,
5301 /* WINED3DSIH_TEXKILL */ shader_glsl_texkill,
5302 /* WINED3DSIH_TEXLDD */ shader_glsl_texldd,
5303 /* WINED3DSIH_TEXLDL */ shader_glsl_texldl,
5304 /* WINED3DSIH_TEXM3x2DEPTH */ shader_glsl_texm3x2depth,
5305 /* WINED3DSIH_TEXM3x2PAD */ shader_glsl_texm3x2pad,
5306 /* WINED3DSIH_TEXM3x2TEX */ shader_glsl_texm3x2tex,
5307 /* WINED3DSIH_TEXM3x3 */ shader_glsl_texm3x3,
5308 /* WINED3DSIH_TEXM3x3DIFF */ NULL,
5309 /* WINED3DSIH_TEXM3x3PAD */ shader_glsl_texm3x3pad,
5310 /* WINED3DSIH_TEXM3x3SPEC */ shader_glsl_texm3x3spec,
5311 /* WINED3DSIH_TEXM3x3TEX */ shader_glsl_texm3x3tex,
5312 /* WINED3DSIH_TEXM3x3VSPEC */ shader_glsl_texm3x3vspec,
5313 /* WINED3DSIH_TEXREG2AR */ shader_glsl_texreg2ar,
5314 /* WINED3DSIH_TEXREG2GB */ shader_glsl_texreg2gb,
5315 /* WINED3DSIH_TEXREG2RGB */ shader_glsl_texreg2rgb,
5316};
5317
5318static void shader_glsl_handle_instruction(const struct wined3d_shader_instruction *ins) {
5319 SHADER_HANDLER hw_fct;
5320
5321 /* Select handler */
5322 hw_fct = shader_glsl_instruction_handler_table[ins->handler_idx];
5323
5324 /* Unhandled opcode */
5325 if (!hw_fct)
5326 {
5327 FIXME("Backend can't handle opcode %#x\n", ins->handler_idx);
5328 return;
5329 }
5330 hw_fct(ins);
5331
5332 shader_glsl_add_instruction_modifiers(ins);
5333}
5334
5335const shader_backend_t glsl_shader_backend = {
5336 shader_glsl_handle_instruction,
5337 shader_glsl_select,
5338 shader_glsl_select_depth_blt,
5339 shader_glsl_deselect_depth_blt,
5340 shader_glsl_update_float_vertex_constants,
5341 shader_glsl_update_float_pixel_constants,
5342 shader_glsl_load_constants,
5343 shader_glsl_load_np2fixup_constants,
5344 shader_glsl_destroy,
5345 shader_glsl_alloc,
5346 shader_glsl_free,
5347 shader_glsl_dirty_const,
5348 shader_glsl_get_caps,
5349 shader_glsl_color_fixup_supported,
5350};
5351
5352#if defined(VBOXWINEDBG_SHADERS) || defined(VBOX_WINE_WITH_PROFILE)
5353void vboxWDbgPrintF(char * szString, ...)
5354{
5355 char szBuffer[4096*2] = {0};
5356 va_list pArgList;
5357 va_start(pArgList, szString);
5358 _vsnprintf(szBuffer, sizeof(szBuffer) / sizeof(szBuffer[0]), szString, pArgList);
5359 va_end(pArgList);
5360
5361 OutputDebugStringA(szBuffer);
5362}
5363#endif
注意: 瀏覽 TracBrowser 來幫助您使用儲存庫瀏覽器

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette