VirtualBox

source: vbox/trunk/src/VBox/Devices/Graphics/shaderlib/glsl_shader.c@ 104027

Last change on this file since 104027 was 103468, checked in by vboxsync, 10 months ago

Devices/Graphics/shaderlib: Need to zero out two structs, bugref:3409

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 221.2 KB
Line 
1/*
2 * GLSL pixel and vertex shader implementation
3 *
4 * Copyright 2006 Jason Green
5 * Copyright 2006-2007 Henri Verbeet
6 * Copyright 2007-2008 Stefan Dösinger for CodeWeavers
7 * Copyright 2009 Henri Verbeet for CodeWeavers
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
22 */
23
24/*
25 * Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice
26 * other than GPL or LGPL is available it will apply instead, Oracle elects to use only
27 * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where
28 * a choice of LGPL license versions is made available with the language indicating
29 * that LGPLv2 or any later version may be used, or where a choice of which version
30 * of the LGPL is applied is otherwise unspecified.
31 */
32
33/*
34 * D3D shader asm has swizzles on source parameters, and write masks for
35 * destination parameters. GLSL uses swizzles for both. The result of this is
36 * that for example "mov dst.xw, src.zyxw" becomes "dst.xw = src.zw" in GLSL.
37 * Ie, to generate a proper GLSL source swizzle, we need to take the D3D write
38 * mask for the destination parameter into account.
39 */
40
41#include "config.h"
42#include "wine/port.h"
43#include <limits.h>
44#include <stdio.h>
45#include "wined3d_private.h"
46
47WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
48WINE_DECLARE_DEBUG_CHANNEL(d3d_constants);
49WINE_DECLARE_DEBUG_CHANNEL(d3d_caps);
50WINE_DECLARE_DEBUG_CHANNEL(d3d);
51
52#ifdef VBOX
53# include <iprt/string.h>
54#endif
55
56#ifdef VBOX_WITH_VMSVGA
57#define LOG_GROUP LOG_GROUP_DEV_VMSVGA
58#include <VBox/log.h>
59#undef WDLOG
60#define WDLOG(_m) Log(_m)
61#undef CONST
62#define CONST const
63#endif
64
65#define GLINFO_LOCATION (*gl_info)
66
67#define WINED3D_GLSL_SAMPLE_PROJECTED 0x1
68#define WINED3D_GLSL_SAMPLE_RECT 0x2
69#define WINED3D_GLSL_SAMPLE_LOD 0x4
70#define WINED3D_GLSL_SAMPLE_GRAD 0x8
71
72typedef struct {
73 char reg_name[150];
74 char mask_str[6];
75} glsl_dst_param_t;
76
77typedef struct {
78 char reg_name[150];
79 char param_str[200];
80} glsl_src_param_t;
81
82typedef struct {
83 const char *name;
84 DWORD coord_mask;
85} glsl_sample_function_t;
86
87enum heap_node_op
88{
89 HEAP_NODE_TRAVERSE_LEFT,
90 HEAP_NODE_TRAVERSE_RIGHT,
91 HEAP_NODE_POP,
92};
93
94struct constant_entry
95{
96 unsigned int idx;
97 unsigned int version;
98};
99
100struct constant_heap
101{
102 struct constant_entry *entries;
103 unsigned int *positions;
104 unsigned int size;
105};
106
107/* GLSL shader private data */
108struct shader_glsl_priv {
109 struct wined3d_shader_buffer shader_buffer;
110 struct wine_rb_tree program_lookup;
111 struct glsl_shader_prog_link *glsl_program;
112 struct constant_heap vconst_heap;
113 struct constant_heap pconst_heap;
114 unsigned char *stack;
115 GLhandleARB depth_blt_program[tex_type_count];
116 UINT next_constant_version;
117};
118
119/* Struct to maintain data about a linked GLSL program */
120struct glsl_shader_prog_link {
121 struct wine_rb_entry program_lookup_entry;
122 struct list vshader_entry;
123 struct list pshader_entry;
124 GLhandleARB programId;
125 GLint *vuniformF_locations;
126 GLint *puniformF_locations;
127 GLint vuniformI_locations[MAX_CONST_I];
128 GLint puniformI_locations[MAX_CONST_I];
129 GLint posFixup_location;
130 GLint np2Fixup_location;
131 GLint bumpenvmat_location[MAX_TEXTURES];
132 GLint luminancescale_location[MAX_TEXTURES];
133 GLint luminanceoffset_location[MAX_TEXTURES];
134 GLint ycorrection_location;
135 GLenum vertex_color_clamp;
136 IWineD3DVertexShader *vshader;
137 IWineD3DPixelShader *pshader;
138 struct vs_compile_args vs_args;
139 struct ps_compile_args ps_args;
140 UINT constant_version;
141 const struct wined3d_context *context;
142 UINT inp2Fixup_info;
143};
144
145#ifdef VBOX_WITH_VMSVGA
146# define WINEFIXUPINFO_NOINDEX (~0U)
147#else
148#define WINEFIXUPINFO_NOINDEX (~0UL)
149#endif
150#define WINEFIXUPINFO_GET(_p) get_fixup_info((const IWineD3DPixelShaderImpl*)(_p)->pshader, (_p)->inp2Fixup_info)
151#define WINEFIXUPINFO_ISVALID(_p) ((_p)->inp2Fixup_info != WINEFIXUPINFO_NOINDEX)
152#ifdef VBOX_WITH_VMSVGA
153# define WINEFIXUPINFO_INIT(_p) do { (_p)->inp2Fixup_info = WINEFIXUPINFO_NOINDEX; } while (0)
154#else
155#define WINEFIXUPINFO_INIT(_p) ((_p)->inp2Fixup_info == WINEFIXUPINFO_NOINDEX)
156#endif
157
158typedef struct {
159 IWineD3DVertexShader *vshader;
160 IWineD3DPixelShader *pshader;
161 struct ps_compile_args ps_args;
162 struct vs_compile_args vs_args;
163 const struct wined3d_context *context;
164} glsl_program_key_t;
165
166struct shader_glsl_ctx_priv {
167 const struct vs_compile_args *cur_vs_args;
168 const struct ps_compile_args *cur_ps_args;
169 struct ps_np2fixup_info *cur_np2fixup_info;
170};
171
172struct glsl_ps_compiled_shader
173{
174 struct ps_compile_args args;
175 struct ps_np2fixup_info np2fixup;
176 GLhandleARB prgId;
177 const struct wined3d_context *context;
178};
179
180struct glsl_pshader_private
181{
182 struct glsl_ps_compiled_shader *gl_shaders;
183 UINT num_gl_shaders, shader_array_size;
184};
185
186struct glsl_vs_compiled_shader
187{
188 struct vs_compile_args args;
189 GLhandleARB prgId;
190 const struct wined3d_context *context;
191};
192
193struct glsl_vshader_private
194{
195 struct glsl_vs_compiled_shader *gl_shaders;
196 UINT num_gl_shaders, shader_array_size;
197};
198
199#ifdef LOG_ENABLED
200static const char *debug_gl_shader_type(GLenum type)
201{
202 switch (type)
203 {
204#define WINED3D_TO_STR(u) case u: return #u
205 WINED3D_TO_STR(GL_VERTEX_SHADER_ARB);
206 WINED3D_TO_STR(GL_GEOMETRY_SHADER_ARB);
207 WINED3D_TO_STR(GL_FRAGMENT_SHADER_ARB);
208#undef WINED3D_TO_STR
209 default:
210 return wine_dbg_sprintf("UNKNOWN(%#x)", type);
211 }
212}
213#endif
214
215/* Extract a line from the info log.
216 * Note that this modifies the source string. */
217static char *get_info_log_line(char **ptr, int *pcbStr)
218{
219 char *p, *q;
220 const int cbStr = *pcbStr;
221
222 if (!cbStr)
223 {
224 /* zero-length string */
225 return NULL;
226 }
227
228 if ((*ptr)[cbStr-1] != '\0')
229 {
230 ERR("string should be null-rerminated, forcing it!");
231 (*ptr)[cbStr-1] = '\0';
232 }
233 p = *ptr;
234 if (!*p)
235 {
236 *pcbStr = 0;
237 return NULL;
238 }
239
240 if (!(q = strstr(p, "\n")))
241 {
242 /* the string contains a single line! */
243 *ptr += strlen(p);
244 *pcbStr = 0;
245 return p;
246 }
247
248 *q = '\0';
249 *pcbStr = cbStr - (((uintptr_t)q) - ((uintptr_t)p)) - 1;
250 Assert((*pcbStr) >= 0);
251 Assert((*pcbStr) < cbStr);
252 *ptr = q + 1;
253
254 return p;
255}
256
257/** Prints the GLSL info log which will contain error messages if they exist */
258/* GL locking is done by the caller */
259static void print_glsl_info_log(const struct wined3d_gl_info *gl_info, GLhandleARB obj)
260{
261 int infologLength = 0;
262 char *infoLog;
263 unsigned int i;
264 BOOL is_spam;
265
266 static const char * const spam[] =
267 {
268 "Vertex shader was successfully compiled to run on hardware.\n", /* fglrx */
269 "Fragment shader was successfully compiled to run on hardware.\n", /* fglrx, with \n */
270 "Fragment shader was successfully compiled to run on hardware.", /* fglrx, no \n */
271 "Fragment shader(s) linked, vertex shader(s) linked. \n ", /* fglrx, with \n */
272 "Fragment shader(s) linked, vertex shader(s) linked.", /* fglrx, no \n */
273 "Vertex shader(s) linked, no fragment shader(s) defined. \n ", /* fglrx, with \n */
274 "Vertex shader(s) linked, no fragment shader(s) defined.", /* fglrx, no \n */
275 "Fragment shader(s) linked, no vertex shader(s) defined. \n ", /* fglrx, with \n */
276 "Fragment shader(s) linked, no vertex shader(s) defined.", /* fglrx, no \n */
277 };
278
279#ifndef VBOXWINEDBG_SHADERS
280 if (!TRACE_ON(d3d_shader) && !FIXME_ON(d3d_shader)) return;
281#endif
282
283 GL_EXTCALL(glGetObjectParameterivARB(obj,
284 GL_OBJECT_INFO_LOG_LENGTH_ARB,
285 &infologLength));
286
287 /* A size of 1 is just a null-terminated string, so the log should be bigger than
288 * that if there are errors. */
289 if (infologLength > 1)
290 {
291 char *ptr, *line;
292 int cbPtr;
293
294 /* Fglrx doesn't terminate the string properly, but it tells us the proper length.
295 * So use HEAP_ZERO_MEMORY to avoid uninitialized bytes
296 */
297 infoLog = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, infologLength);
298 GL_EXTCALL(glGetInfoLogARB(obj, infologLength, NULL, infoLog));
299 is_spam = FALSE;
300
301 for(i = 0; i < sizeof(spam) / sizeof(spam[0]); i++) {
302 if(strcmp(infoLog, spam[i]) == 0) {
303 is_spam = TRUE;
304 break;
305 }
306 }
307
308 ptr = infoLog;
309 cbPtr = infologLength;
310 if (is_spam)
311 {
312 WDLOG(("Spam received from GLSL shader #%u:\n", obj));
313 while ((line = get_info_log_line(&ptr, &cbPtr))) WDLOG((" %s\n", line));
314 }
315 else
316 {
317 WDLOG(("Error received from GLSL shader #%u:\n", obj));
318 while ((line = get_info_log_line(&ptr, &cbPtr))) WDLOG((" %s\n", line));
319 }
320 HeapFree(GetProcessHeap(), 0, infoLog);
321 }
322}
323
324static void shader_glsl_dump_shader_source(const struct wined3d_gl_info *gl_info, GLhandleARB shader)
325{
326 char *ptr;
327 GLint tmp, source_size;
328 char *source = NULL;
329 int cbPtr;
330
331 GL_EXTCALL(glGetObjectParameterivARB(shader, GL_OBJECT_SHADER_SOURCE_LENGTH_ARB, &tmp));
332
333 source = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, tmp);
334 if (!source)
335 {
336 ERR("Failed to allocate %d bytes for shader source.\n", tmp);
337 return;
338 }
339
340 source_size = tmp;
341
342 WDLOG(("Object %u:\n", shader));
343 GL_EXTCALL(glGetObjectParameterivARB(shader, GL_OBJECT_SUBTYPE_ARB, &tmp));
344 WDLOG((" GL_OBJECT_SUBTYPE_ARB: %s.\n", debug_gl_shader_type(tmp)));
345 GL_EXTCALL(glGetObjectParameterivARB(shader, GL_OBJECT_COMPILE_STATUS_ARB, &tmp));
346 WDLOG((" GL_OBJECT_COMPILE_STATUS_ARB: %d.\n", tmp));
347 WDLOG(("\n"));
348
349 if (tmp == 0)
350 {
351 /* Compilation error, print the compiler's error messages. */
352 print_glsl_info_log(gl_info, shader);
353 }
354
355 ptr = source;
356 cbPtr = source_size;
357 GL_EXTCALL(glGetShaderSourceARB(shader, source_size, NULL, source));
358#if 0
359 while ((line = get_info_log_line(&ptr, &cbPtr))) WDLOG((" %s\n", line));
360#else
361 WDLOG(("*****shader source***\n"));
362 WDLOG((" %s\n", source));
363 WDLOG(("\n*****END shader source***\n\n"));
364#endif
365 WDLOG(("\n"));
366#ifdef VBOX
367 HeapFree(GetProcessHeap(), 0, source);
368#endif
369}
370
371/* GL locking is done by the caller. */
372static void shader_glsl_dump_program_source(const struct wined3d_gl_info *gl_info, GLhandleARB program)
373{
374 GLint i, object_count;
375 GLhandleARB *objects;
376 char *source = NULL;
377
378 WDLOG(("\n***************************dumping program %d******************************\n", program));
379
380 GL_EXTCALL(glGetObjectParameterivARB(program, GL_OBJECT_ATTACHED_OBJECTS_ARB, &object_count));
381 objects = HeapAlloc(GetProcessHeap(), 0, object_count * sizeof(*objects));
382 if (!objects)
383 {
384 ERR("Failed to allocate object array memory.\n");
385 return;
386 }
387
388 GL_EXTCALL(glGetAttachedObjectsARB(program, object_count, NULL, objects));
389 for (i = 0; i < object_count; ++i)
390 {
391 shader_glsl_dump_shader_source(gl_info, objects[i]);
392 }
393
394 HeapFree(GetProcessHeap(), 0, source);
395 HeapFree(GetProcessHeap(), 0, objects);
396
397 WDLOG(("\n***************************END dumping program %d******************************\n\n", program));
398}
399
400/* GL locking is done by the caller. */
401static void shader_glsl_validate_compile_link(const struct wined3d_gl_info *gl_info, GLhandleARB program, GLboolean fIsProgram)
402{
403 GLint tmp = -1;
404
405#ifndef VBOXWINEDBG_SHADERS
406 if (!TRACE_ON(d3d_shader) && !FIXME_ON(d3d_shader)) return;
407#endif
408
409 GL_EXTCALL(glGetObjectParameterivARB(program, GL_OBJECT_TYPE_ARB, &tmp));
410 if (tmp == GL_PROGRAM_OBJECT_ARB)
411 {
412 if (!fIsProgram)
413 {
414 ERR("this is a program, but shader expected");
415 }
416 GL_EXTCALL(glGetObjectParameterivARB(program, GL_OBJECT_LINK_STATUS_ARB, &tmp));
417 if (!tmp)
418 {
419 ERR("Program %p link status invalid.\n", (void *)(uintptr_t)program);
420#ifndef VBOXWINEDBG_SHADERS
421 shader_glsl_dump_program_source(gl_info, program);
422#endif
423 }
424#if defined(VBOX_WITH_VMSVGA) && defined(DEBUG)
425 shader_glsl_dump_program_source(gl_info, program);
426#endif
427 }
428 else if (tmp == GL_SHADER_OBJECT_ARB)
429 {
430 if (fIsProgram)
431 {
432 ERR("this is a shader, but program expected");
433 }
434
435 GL_EXTCALL(glGetObjectParameterivARB(program, GL_OBJECT_COMPILE_STATUS_ARB, &tmp));
436 if (!tmp)
437 {
438 ERR("Shader %p compile status invalid.\n", (void *)(uintptr_t)program);
439 shader_glsl_dump_shader_source(gl_info, program);
440 }
441 }
442 else
443 {
444 ERR("unexpected oject type(%d)!", tmp);
445 }
446
447 print_glsl_info_log(gl_info, program);
448}
449
450/**
451 * Loads (pixel shader) samplers
452 */
453/* GL locking is done by the caller */
454static void shader_glsl_load_psamplers(const struct wined3d_gl_info *gl_info,
455 DWORD *tex_unit_map, GLhandleARB programId)
456{
457 GLint name_loc;
458 int i;
459 char sampler_name[20];
460
461 for (i = 0; i < MAX_FRAGMENT_SAMPLERS; ++i) {
462 snprintf(sampler_name, sizeof(sampler_name), "Psampler%d", i);
463 name_loc = GL_EXTCALL(glGetUniformLocationARB(programId, sampler_name));
464 if (name_loc != -1) {
465 DWORD mapped_unit = tex_unit_map[i];
466 if (mapped_unit != WINED3D_UNMAPPED_STAGE && mapped_unit < gl_info->limits.fragment_samplers)
467 {
468 TRACE("Loading %s for texture %d\n", sampler_name, mapped_unit);
469 GL_EXTCALL(glUniform1iARB(name_loc, mapped_unit));
470 checkGLcall("glUniform1iARB");
471 } else {
472 ERR("Trying to load sampler %s on unsupported unit %d\n", sampler_name, mapped_unit);
473 }
474 }
475 }
476}
477
478/* GL locking is done by the caller */
479static void shader_glsl_load_vsamplers(const struct wined3d_gl_info *gl_info,
480 DWORD *tex_unit_map, GLhandleARB programId)
481{
482 GLint name_loc;
483 char sampler_name[20];
484 int i;
485
486 for (i = 0; i < MAX_VERTEX_SAMPLERS; ++i) {
487 snprintf(sampler_name, sizeof(sampler_name), "Vsampler%d", i);
488 name_loc = GL_EXTCALL(glGetUniformLocationARB(programId, sampler_name));
489 if (name_loc != -1) {
490 DWORD mapped_unit = tex_unit_map[MAX_FRAGMENT_SAMPLERS + i];
491 if (mapped_unit != WINED3D_UNMAPPED_STAGE && mapped_unit < gl_info->limits.combined_samplers)
492 {
493 TRACE("Loading %s for texture %d\n", sampler_name, mapped_unit);
494 GL_EXTCALL(glUniform1iARB(name_loc, mapped_unit));
495 checkGLcall("glUniform1iARB");
496 } else {
497 ERR("Trying to load sampler %s on unsupported unit %d\n", sampler_name, mapped_unit);
498 }
499 }
500 }
501}
502
503/* GL locking is done by the caller */
504static inline void walk_constant_heap(const struct wined3d_gl_info *gl_info, const float *constants,
505 const GLint *constant_locations, const struct constant_heap *heap, unsigned char *stack, DWORD version)
506{
507 int stack_idx = 0;
508 unsigned int heap_idx = 1;
509 unsigned int idx;
510
511 if (heap->entries[heap_idx].version <= version) return;
512
513 idx = heap->entries[heap_idx].idx;
514 if (constant_locations[idx] != -1) GL_EXTCALL(glUniform4fvARB(constant_locations[idx], 1, &constants[idx * 4]));
515 stack[stack_idx] = HEAP_NODE_TRAVERSE_LEFT;
516
517 while (stack_idx >= 0)
518 {
519 /* Note that we fall through to the next case statement. */
520 switch(stack[stack_idx])
521 {
522 case HEAP_NODE_TRAVERSE_LEFT:
523 {
524 unsigned int left_idx = heap_idx << 1;
525 if (left_idx < heap->size && heap->entries[left_idx].version > version)
526 {
527 heap_idx = left_idx;
528 idx = heap->entries[heap_idx].idx;
529 if (constant_locations[idx] != -1)
530 GL_EXTCALL(glUniform4fvARB(constant_locations[idx], 1, &constants[idx * 4]));
531
532 stack[stack_idx++] = HEAP_NODE_TRAVERSE_RIGHT;
533 stack[stack_idx] = HEAP_NODE_TRAVERSE_LEFT;
534 break;
535 }
536 } RT_FALL_THRU();
537
538 case HEAP_NODE_TRAVERSE_RIGHT:
539 {
540 unsigned int right_idx = (heap_idx << 1) + 1;
541 if (right_idx < heap->size && heap->entries[right_idx].version > version)
542 {
543 heap_idx = right_idx;
544 idx = heap->entries[heap_idx].idx;
545 if (constant_locations[idx] != -1)
546 GL_EXTCALL(glUniform4fvARB(constant_locations[idx], 1, &constants[idx * 4]));
547
548 stack[stack_idx++] = HEAP_NODE_POP;
549 stack[stack_idx] = HEAP_NODE_TRAVERSE_LEFT;
550 break;
551 }
552 } RT_FALL_THRU();
553
554 case HEAP_NODE_POP:
555 {
556 heap_idx >>= 1;
557 --stack_idx;
558 break;
559 }
560 }
561 }
562 checkGLcall("walk_constant_heap()");
563}
564
565/* GL locking is done by the caller */
566static inline void apply_clamped_constant(const struct wined3d_gl_info *gl_info, GLint location, const GLfloat *data)
567{
568 GLfloat clamped_constant[4];
569
570 if (location == -1) return;
571
572 clamped_constant[0] = data[0] < -1.0f ? -1.0f : data[0] > 1.0f ? 1.0f : data[0];
573 clamped_constant[1] = data[1] < -1.0f ? -1.0f : data[1] > 1.0f ? 1.0f : data[1];
574 clamped_constant[2] = data[2] < -1.0f ? -1.0f : data[2] > 1.0f ? 1.0f : data[2];
575 clamped_constant[3] = data[3] < -1.0f ? -1.0f : data[3] > 1.0f ? 1.0f : data[3];
576
577 GL_EXTCALL(glUniform4fvARB(location, 1, clamped_constant));
578}
579
580/* GL locking is done by the caller */
581static inline void walk_constant_heap_clamped(const struct wined3d_gl_info *gl_info, const float *constants,
582 const GLint *constant_locations, const struct constant_heap *heap, unsigned char *stack, DWORD version)
583{
584 int stack_idx = 0;
585 unsigned int heap_idx = 1;
586 unsigned int idx;
587
588 if (heap->entries[heap_idx].version <= version) return;
589
590 idx = heap->entries[heap_idx].idx;
591 apply_clamped_constant(gl_info, constant_locations[idx], &constants[idx * 4]);
592 stack[stack_idx] = HEAP_NODE_TRAVERSE_LEFT;
593
594 while (stack_idx >= 0)
595 {
596 /* Note that we fall through to the next case statement. */
597 switch(stack[stack_idx])
598 {
599 case HEAP_NODE_TRAVERSE_LEFT:
600 {
601 unsigned int left_idx = heap_idx << 1;
602 if (left_idx < heap->size && heap->entries[left_idx].version > version)
603 {
604 heap_idx = left_idx;
605 idx = heap->entries[heap_idx].idx;
606 apply_clamped_constant(gl_info, constant_locations[idx], &constants[idx * 4]);
607
608 stack[stack_idx++] = HEAP_NODE_TRAVERSE_RIGHT;
609 stack[stack_idx] = HEAP_NODE_TRAVERSE_LEFT;
610 break;
611 }
612 } RT_FALL_THRU();
613
614 case HEAP_NODE_TRAVERSE_RIGHT:
615 {
616 unsigned int right_idx = (heap_idx << 1) + 1;
617 if (right_idx < heap->size && heap->entries[right_idx].version > version)
618 {
619 heap_idx = right_idx;
620 idx = heap->entries[heap_idx].idx;
621 apply_clamped_constant(gl_info, constant_locations[idx], &constants[idx * 4]);
622
623 stack[stack_idx++] = HEAP_NODE_POP;
624 stack[stack_idx] = HEAP_NODE_TRAVERSE_LEFT;
625 break;
626 }
627 } RT_FALL_THRU();
628
629 case HEAP_NODE_POP:
630 {
631 heap_idx >>= 1;
632 --stack_idx;
633 break;
634 }
635 }
636 }
637 checkGLcall("walk_constant_heap_clamped()");
638}
639
640/* Loads floating point constants (aka uniforms) into the currently set GLSL program. */
641/* GL locking is done by the caller */
642static void shader_glsl_load_constantsF(IWineD3DBaseShaderImpl *This, const struct wined3d_gl_info *gl_info,
643 const float *constants, const GLint *constant_locations, const struct constant_heap *heap,
644 unsigned char *stack, UINT version)
645{
646 const local_constant *lconst;
647
648 /* 1.X pshaders have the constants clamped to [-1;1] implicitly. */
649 if (This->baseShader.reg_maps.shader_version.major == 1
650 && shader_is_pshader_version(This->baseShader.reg_maps.shader_version.type))
651 walk_constant_heap_clamped(gl_info, constants, constant_locations, heap, stack, version);
652 else
653 walk_constant_heap(gl_info, constants, constant_locations, heap, stack, version);
654
655 if (!This->baseShader.load_local_constsF)
656 {
657 TRACE("No need to load local float constants for this shader\n");
658 return;
659 }
660
661 /* Immediate constants are clamped to [-1;1] at shader creation time if needed */
662 LIST_FOR_EACH_ENTRY(lconst, &This->baseShader.constantsF, local_constant, entry)
663 {
664 GLint location = constant_locations[lconst->idx];
665 /* We found this uniform name in the program - go ahead and send the data */
666 if (location != -1) GL_EXTCALL(glUniform4fvARB(location, 1, (const GLfloat *)lconst->value));
667 }
668 checkGLcall("glUniform4fvARB()");
669}
670
671/* Loads integer constants (aka uniforms) into the currently set GLSL program. */
672/* GL locking is done by the caller */
673static void shader_glsl_load_constantsI(IWineD3DBaseShaderImpl *This, const struct wined3d_gl_info *gl_info,
674 const GLint locations[MAX_CONST_I], const int *constants, WORD constants_set)
675{
676 unsigned int i;
677 struct list* ptr;
678
679 for (i = 0; constants_set; constants_set >>= 1, ++i)
680 {
681 if (!(constants_set & 1)) continue;
682
683 TRACE_(d3d_constants)("Loading constants %u: %i, %i, %i, %i\n",
684 i, constants[i*4], constants[i*4+1], constants[i*4+2], constants[i*4+3]);
685
686 /* We found this uniform name in the program - go ahead and send the data */
687 GL_EXTCALL(glUniform4ivARB(locations[i], 1, &constants[i*4]));
688 checkGLcall("glUniform4ivARB");
689 }
690
691 /* Load immediate constants */
692 ptr = list_head(&This->baseShader.constantsI);
693 while (ptr) {
694 const struct local_constant *lconst = LIST_ENTRY(ptr, const struct local_constant, entry);
695 unsigned int idx = lconst->idx;
696 const GLint *values = (const GLint *)lconst->value;
697
698 TRACE_(d3d_constants)("Loading local constants %i: %i, %i, %i, %i\n", idx,
699 values[0], values[1], values[2], values[3]);
700
701 /* We found this uniform name in the program - go ahead and send the data */
702 GL_EXTCALL(glUniform4ivARB(locations[idx], 1, values));
703 checkGLcall("glUniform4ivARB");
704 ptr = list_next(&This->baseShader.constantsI, ptr);
705 }
706}
707
708/* Loads boolean constants (aka uniforms) into the currently set GLSL program. */
709/* GL locking is done by the caller */
710static void shader_glsl_load_constantsB(IWineD3DBaseShaderImpl *This, const struct wined3d_gl_info *gl_info,
711 GLhandleARB programId, const BOOL *constants, WORD constants_set)
712{
713 GLint tmp_loc;
714 unsigned int i;
715 char tmp_name[8];
716 const char *prefix;
717 struct list* ptr;
718
719 switch (This->baseShader.reg_maps.shader_version.type)
720 {
721 case WINED3D_SHADER_TYPE_VERTEX:
722 prefix = "VB";
723 break;
724
725 case WINED3D_SHADER_TYPE_GEOMETRY:
726 prefix = "GB";
727 break;
728
729 case WINED3D_SHADER_TYPE_PIXEL:
730 prefix = "PB";
731 break;
732
733 default:
734 FIXME("Unknown shader type %#x.\n",
735 This->baseShader.reg_maps.shader_version.type);
736 prefix = "UB";
737 break;
738 }
739
740 /* TODO: Benchmark and see if it would be beneficial to store the
741 * locations of the constants to avoid looking up each time */
742 for (i = 0; constants_set; constants_set >>= 1, ++i)
743 {
744 if (!(constants_set & 1)) continue;
745
746 TRACE_(d3d_constants)("Loading constants %i: %i;\n", i, constants[i]);
747
748 /* TODO: Benchmark and see if it would be beneficial to store the
749 * locations of the constants to avoid looking up each time */
750 snprintf(tmp_name, sizeof(tmp_name), "%s[%i]", prefix, i);
751 tmp_loc = GL_EXTCALL(glGetUniformLocationARB(programId, tmp_name));
752 if (tmp_loc != -1)
753 {
754 /* We found this uniform name in the program - go ahead and send the data */
755 GL_EXTCALL(glUniform1ivARB(tmp_loc, 1, &constants[i]));
756 checkGLcall("glUniform1ivARB");
757 }
758 }
759
760 /* Load immediate constants */
761 ptr = list_head(&This->baseShader.constantsB);
762 while (ptr) {
763 const struct local_constant *lconst = LIST_ENTRY(ptr, const struct local_constant, entry);
764 unsigned int idx = lconst->idx;
765 const GLint *values = (const GLint *)lconst->value;
766
767 TRACE_(d3d_constants)("Loading local constants %i: %i\n", idx, values[0]);
768
769 snprintf(tmp_name, sizeof(tmp_name), "%s[%i]", prefix, idx);
770 tmp_loc = GL_EXTCALL(glGetUniformLocationARB(programId, tmp_name));
771 if (tmp_loc != -1) {
772 /* We found this uniform name in the program - go ahead and send the data */
773 GL_EXTCALL(glUniform1ivARB(tmp_loc, 1, values));
774 checkGLcall("glUniform1ivARB");
775 }
776 ptr = list_next(&This->baseShader.constantsB, ptr);
777 }
778}
779
780static void reset_program_constant_version(struct wine_rb_entry *entry, void *context)
781{
782 WINE_RB_ENTRY_VALUE(entry, struct glsl_shader_prog_link, program_lookup_entry)->constant_version = 0;
783}
784
785static const struct ps_np2fixup_info * get_fixup_info(const IWineD3DPixelShaderImpl *shader, UINT inp2fixup_info)
786{
787 struct glsl_pshader_private *shader_data = shader->baseShader.backend_data;
788
789 if (inp2fixup_info == WINEFIXUPINFO_NOINDEX)
790 return NULL;
791
792 if (!shader->baseShader.backend_data)
793 {
794 ERR("no backend data\n");
795 return NULL;
796 }
797 shader_data = shader->baseShader.backend_data;
798
799 if (inp2fixup_info >= shader_data->num_gl_shaders)
800 {
801 ERR("invalid index\n");
802 return NULL;
803 }
804
805 return &shader_data->gl_shaders[inp2fixup_info].np2fixup;
806}
807
808/**
809 * Loads the texture dimensions for NP2 fixup into the currently set GLSL program.
810 */
811/* GL locking is done by the caller (state handler) */
812static void shader_glsl_load_np2fixup_constants(
813 IWineD3DDevice* device,
814 char usePixelShader,
815 char useVertexShader) {
816
817 const IWineD3DDeviceImpl* deviceImpl = (const IWineD3DDeviceImpl*) device;
818 const struct glsl_shader_prog_link* prog = ((struct shader_glsl_priv *)(deviceImpl->shader_priv))->glsl_program;
819
820 if (!prog) {
821 /* No GLSL program set - nothing to do. */
822 return;
823 }
824
825 if (!usePixelShader) {
826 /* NP2 texcoord fixup is (currently) only done for pixelshaders. */
827 return;
828 }
829
830 if (prog->ps_args.np2_fixup && -1 != prog->np2Fixup_location) {
831 const struct wined3d_gl_info *gl_info = &deviceImpl->adapter->gl_info;
832 const IWineD3DStateBlockImpl* stateBlock = (const IWineD3DStateBlockImpl*) deviceImpl->stateBlock;
833 UINT i;
834 UINT fixup = prog->ps_args.np2_fixup;
835 GLfloat np2fixup_constants[4 * MAX_FRAGMENT_SAMPLERS];
836
837 const struct ps_np2fixup_info *np2Fixup_info = WINEFIXUPINFO_GET(prog);
838
839 for (i = 0; fixup; fixup >>= 1, ++i) {
840 const unsigned char idx = np2Fixup_info->idx[i];
841 const IWineD3DBaseTextureImpl* const tex = (const IWineD3DBaseTextureImpl*) stateBlock->textures[i];
842 GLfloat* tex_dim = &np2fixup_constants[(idx >> 1) * 4];
843
844 if (!tex) {
845 FIXME("Nonexistent texture is flagged for NP2 texcoord fixup\n");
846 continue;
847 }
848
849 if (idx % 2) {
850 tex_dim[2] = tex->baseTexture.pow2Matrix[0]; tex_dim[3] = tex->baseTexture.pow2Matrix[5];
851 } else {
852 tex_dim[0] = tex->baseTexture.pow2Matrix[0]; tex_dim[1] = tex->baseTexture.pow2Matrix[5];
853 }
854 }
855
856 GL_EXTCALL(glUniform4fvARB(prog->np2Fixup_location, np2Fixup_info->num_consts, np2fixup_constants));
857 }
858}
859
860/**
861 * Loads the app-supplied constants into the currently set GLSL program.
862 */
863/* GL locking is done by the caller (state handler) */
864static void shader_glsl_load_constants(const struct wined3d_context *context,
865 char usePixelShader, char useVertexShader)
866{
867 const struct wined3d_gl_info *gl_info = context->gl_info;
868 IWineD3DDeviceImpl *device = context_get_device(context);
869 IWineD3DStateBlockImpl* stateBlock = device->stateBlock;
870 struct shader_glsl_priv *priv = device->shader_priv;
871
872 GLhandleARB programId;
873 struct glsl_shader_prog_link *prog = priv->glsl_program;
874 UINT constant_version;
875 int i;
876
877 if (!prog) {
878 /* No GLSL program set - nothing to do. */
879 return;
880 }
881 programId = prog->programId;
882 constant_version = prog->constant_version;
883
884 if (useVertexShader) {
885 IWineD3DBaseShaderImpl* vshader = (IWineD3DBaseShaderImpl*) stateBlock->vertexShader;
886
887 /* Load DirectX 9 float constants/uniforms for vertex shader */
888 shader_glsl_load_constantsF(vshader, gl_info, stateBlock->vertexShaderConstantF,
889 prog->vuniformF_locations, &priv->vconst_heap, priv->stack, constant_version);
890
891 /* Load DirectX 9 integer constants/uniforms for vertex shader */
892 shader_glsl_load_constantsI(vshader, gl_info, prog->vuniformI_locations, stateBlock->vertexShaderConstantI,
893 stateBlock->changed.vertexShaderConstantsI & vshader->baseShader.reg_maps.integer_constants);
894
895 /* Load DirectX 9 boolean constants/uniforms for vertex shader */
896 shader_glsl_load_constantsB(vshader, gl_info, programId, stateBlock->vertexShaderConstantB,
897 stateBlock->changed.vertexShaderConstantsB & vshader->baseShader.reg_maps.boolean_constants);
898
899 /* Upload the position fixup params */
900 GL_EXTCALL(glUniform4fvARB(prog->posFixup_location, 1, &device->posFixup[0]));
901 checkGLcall("glUniform4fvARB");
902 }
903
904 if (usePixelShader) {
905
906 IWineD3DBaseShaderImpl* pshader = (IWineD3DBaseShaderImpl*) stateBlock->pixelShader;
907
908 /* Load DirectX 9 float constants/uniforms for pixel shader */
909 shader_glsl_load_constantsF(pshader, gl_info, stateBlock->pixelShaderConstantF,
910 prog->puniformF_locations, &priv->pconst_heap, priv->stack, constant_version);
911
912 /* Load DirectX 9 integer constants/uniforms for pixel shader */
913 shader_glsl_load_constantsI(pshader, gl_info, prog->puniformI_locations, stateBlock->pixelShaderConstantI,
914 stateBlock->changed.pixelShaderConstantsI & pshader->baseShader.reg_maps.integer_constants);
915
916 /* Load DirectX 9 boolean constants/uniforms for pixel shader */
917 shader_glsl_load_constantsB(pshader, gl_info, programId, stateBlock->pixelShaderConstantB,
918 stateBlock->changed.pixelShaderConstantsB & pshader->baseShader.reg_maps.boolean_constants);
919
920 /* Upload the environment bump map matrix if needed. The needsbumpmat member specifies the texture stage to load the matrix from.
921 * It can't be 0 for a valid texbem instruction.
922 */
923 for(i = 0; i < MAX_TEXTURES; i++) {
924 const float *data;
925
926 if(prog->bumpenvmat_location[i] == -1) continue;
927
928 data = (const float *)&stateBlock->textureState[i][WINED3DTSS_BUMPENVMAT00];
929 GL_EXTCALL(glUniformMatrix2fvARB(prog->bumpenvmat_location[i], 1, 0, data));
930 checkGLcall("glUniformMatrix2fvARB");
931
932 /* texbeml needs the luminance scale and offset too. If texbeml is used, needsbumpmat
933 * is set too, so we can check that in the needsbumpmat check
934 */
935 if(prog->luminancescale_location[i] != -1) {
936 const GLfloat *scale = (const GLfloat *)&stateBlock->textureState[i][WINED3DTSS_BUMPENVLSCALE];
937 const GLfloat *offset = (const GLfloat *)&stateBlock->textureState[i][WINED3DTSS_BUMPENVLOFFSET];
938
939 GL_EXTCALL(glUniform1fvARB(prog->luminancescale_location[i], 1, scale));
940 checkGLcall("glUniform1fvARB");
941 GL_EXTCALL(glUniform1fvARB(prog->luminanceoffset_location[i], 1, offset));
942 checkGLcall("glUniform1fvARB");
943 }
944 }
945
946 if(((IWineD3DPixelShaderImpl *) pshader)->vpos_uniform) {
947 float correction_params[4];
948
949 if (context->render_offscreen)
950 {
951 correction_params[0] = 0.0f;
952 correction_params[1] = 1.0f;
953 } else {
954 /* position is window relative, not viewport relative */
955#ifdef VBOX_WITH_VMSVGA
956 correction_params[0] = device->rtHeight;
957#else
958 correction_params[0] = ((IWineD3DSurfaceImpl *)context->current_rt)->currentDesc.Height;
959#endif
960 correction_params[1] = -1.0f;
961 }
962 GL_EXTCALL(glUniform4fvARB(prog->ycorrection_location, 1, correction_params));
963 }
964 }
965
966 if (priv->next_constant_version == UINT_MAX)
967 {
968 TRACE("Max constant version reached, resetting to 0.\n");
969 wine_rb_for_each_entry(&priv->program_lookup, reset_program_constant_version, NULL);
970 priv->next_constant_version = 1;
971 }
972 else
973 {
974 prog->constant_version = priv->next_constant_version++;
975 }
976}
977
978static inline void update_heap_entry(struct constant_heap *heap, unsigned int idx,
979 unsigned int heap_idx, DWORD new_version)
980{
981 struct constant_entry *entries = heap->entries;
982 unsigned int *positions = heap->positions;
983 unsigned int parent_idx;
984
985 while (heap_idx > 1)
986 {
987 parent_idx = heap_idx >> 1;
988
989 if (new_version <= entries[parent_idx].version) break;
990
991 entries[heap_idx] = entries[parent_idx];
992 positions[entries[parent_idx].idx] = heap_idx;
993 heap_idx = parent_idx;
994 }
995
996 entries[heap_idx].version = new_version;
997 entries[heap_idx].idx = idx;
998 positions[idx] = heap_idx;
999}
1000
1001static void shader_glsl_update_float_vertex_constants(IWineD3DDevice *iface, UINT start, UINT count)
1002{
1003 IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *)iface;
1004 struct shader_glsl_priv *priv = This->shader_priv;
1005 struct constant_heap *heap = &priv->vconst_heap;
1006 UINT i;
1007
1008 for (i = start; i < count + start; ++i)
1009 {
1010 if (!This->stateBlock->changed.vertexShaderConstantsF[i])
1011 update_heap_entry(heap, i, heap->size++, priv->next_constant_version);
1012 else
1013 update_heap_entry(heap, i, heap->positions[i], priv->next_constant_version);
1014 }
1015}
1016
1017static void shader_glsl_update_float_pixel_constants(IWineD3DDevice *iface, UINT start, UINT count)
1018{
1019 IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *)iface;
1020 struct shader_glsl_priv *priv = This->shader_priv;
1021 struct constant_heap *heap = &priv->pconst_heap;
1022 UINT i;
1023
1024 for (i = start; i < count + start; ++i)
1025 {
1026 if (!This->stateBlock->changed.pixelShaderConstantsF[i])
1027 update_heap_entry(heap, i, heap->size++, priv->next_constant_version);
1028 else
1029 update_heap_entry(heap, i, heap->positions[i], priv->next_constant_version);
1030 }
1031}
1032
1033static unsigned int vec4_varyings(DWORD shader_major, const struct wined3d_gl_info *gl_info)
1034{
1035 unsigned int ret = gl_info->limits.glsl_varyings / 4;
1036 /* 4.0 shaders do not write clip coords because d3d10 does not support user clipplanes */
1037 if(shader_major > 3) return ret;
1038
1039 /* 3.0 shaders may need an extra varying for the clip coord on some cards(mostly dx10 ones) */
1040 if (gl_info->quirks & WINED3D_QUIRK_GLSL_CLIP_VARYING) ret -= 1;
1041 return ret;
1042}
1043
1044/** Generate the variable & register declarations for the GLSL output target */
1045static void shader_generate_glsl_declarations(const struct wined3d_context *context,
1046 struct wined3d_shader_buffer *buffer, IWineD3DBaseShader *iface,
1047 const shader_reg_maps *reg_maps, struct shader_glsl_ctx_priv *ctx_priv)
1048{
1049 IWineD3DBaseShaderImpl* This = (IWineD3DBaseShaderImpl*) iface;
1050 IWineD3DDeviceImpl *device = (IWineD3DDeviceImpl *) This->baseShader.device;
1051 const struct ps_compile_args *ps_args = ctx_priv->cur_ps_args;
1052 const struct wined3d_gl_info *gl_info = context->gl_info;
1053 unsigned int i, extra_constants_needed = 0;
1054 const local_constant *lconst;
1055 DWORD map;
1056
1057 /* There are some minor differences between pixel and vertex shaders */
1058 char pshader = shader_is_pshader_version(reg_maps->shader_version.type);
1059 char prefix = pshader ? 'P' : 'V';
1060
1061 /* Prototype the subroutines */
1062 for (i = 0, map = reg_maps->labels; map; map >>= 1, ++i)
1063 {
1064 if (map & 1) shader_addline(buffer, "void subroutine%u();\n", i);
1065 }
1066
1067#ifdef VBOX_WITH_VMSVGA
1068 /* Declare texture samplers before the constants in order to workaround a NVidia driver quirk. */
1069 for (i = 0; i < This->baseShader.limits.sampler; i++) {
1070 if (reg_maps->sampler_type[i])
1071 {
1072 switch (reg_maps->sampler_type[i])
1073 {
1074 case WINED3DSTT_1D:
1075 shader_addline(buffer, "uniform sampler1D %csampler%u;\n", prefix, i);
1076 break;
1077 case WINED3DSTT_2D:
1078 if(device->stateBlock->textures[i] &&
1079 IWineD3DBaseTexture_GetTextureDimensions(device->stateBlock->textures[i]) == GL_TEXTURE_RECTANGLE_ARB) {
1080 shader_addline(buffer, "uniform sampler2DRect %csampler%u;\n", prefix, i);
1081 } else {
1082 shader_addline(buffer, "uniform sampler2D %csampler%u;\n", prefix, i);
1083 }
1084 break;
1085 case WINED3DSTT_CUBE:
1086 shader_addline(buffer, "uniform samplerCube %csampler%u;\n", prefix, i);
1087 break;
1088 case WINED3DSTT_VOLUME:
1089 shader_addline(buffer, "uniform sampler3D %csampler%u;\n", prefix, i);
1090 break;
1091 default:
1092 shader_addline(buffer, "uniform unsupported_sampler %csampler%u;\n", prefix, i);
1093 FIXME("Unrecognized sampler type: %#x\n", reg_maps->sampler_type[i]);
1094 break;
1095 }
1096 }
1097 }
1098#endif
1099
1100 /* Declare the constants (aka uniforms) */
1101 if (This->baseShader.limits.constant_float > 0) {
1102 unsigned max_constantsF;
1103 /* Unless the shader uses indirect addressing, always declare the maximum array size and ignore that we need some
1104 * uniforms privately. E.g. if GL supports 256 uniforms, and we need 2 for the pos fixup and immediate values, still
1105 * declare VC[256]. If the shader needs more uniforms than we have it won't work in any case. If it uses less, the
1106 * compiler will figure out which uniforms are really used and strip them out. This allows a shader to use c255 on
1107 * a dx9 card, as long as it doesn't also use all the other constants.
1108 *
1109 * If the shader uses indirect addressing the compiler must assume that all declared uniforms are used. In this case,
1110 * declare only the amount that we're assured to have.
1111 *
1112 * Thus we run into problems in these two cases:
1113 * 1) The shader really uses more uniforms than supported
1114 * 2) The shader uses indirect addressing, less constants than supported, but uses a constant index > #supported consts
1115 */
1116 if (pshader)
1117 {
1118 /* No indirect addressing here. */
1119 max_constantsF = gl_info->limits.glsl_ps_float_constants;
1120 }
1121 else
1122 {
1123#ifndef VBOX_WITH_VMSVGA
1124 if(This->baseShader.reg_maps.usesrelconstF) {
1125#else
1126 /* If GL supports only 256 constants (seen on macOS drivers for compatibility profile, which we use),
1127 * then ignore the need for potential uniforms and always declare VC[256].
1128 * This allows to compile Windows 10 shader which use hardcoded constants at 250+ index range.
1129 * Fixes drawing problems on Windows 10 desktop.
1130 *
1131 * This hack is normally active only on macOS, because Windows and Linux OpenGL drivers
1132 * have a more usable limit for GL compatibility context (1024+).
1133 */
1134 if (This->baseShader.reg_maps.usesrelconstF && gl_info->limits.glsl_vs_float_constants > 256) {
1135#endif
1136 /* Subtract the other potential uniforms from the max available (bools, ints, and 1 row of projection matrix).
1137 * Subtract another uniform for immediate values, which have to be loaded via uniform by the driver as well.
1138 * The shader code only uses 0.5, 2.0, 1.0, 128 and -128 in vertex shader code, so one vec4 should be enough
1139 * (Unfortunately the Nvidia driver doesn't store 128 and -128 in one float).
1140 *
1141 * Writing gl_ClipVertex requires one uniform for each clipplane as well.
1142 */
1143#ifdef VBOX_WITH_WDDM
1144 if (gl_info->limits.glsl_vs_float_constants == 256)
1145 {
1146 DWORD dwVersion = GetVersion();
1147 DWORD dwMajor = (DWORD)(LOBYTE(LOWORD(dwVersion)));
1148 DWORD dwMinor = (DWORD)(HIBYTE(LOWORD(dwVersion)));
1149 /* tmp workaround Win8 Aero requirement for 256 */
1150 if (dwMajor > 6 || dwMinor > 1)
1151 {
1152 /* tmp work-around to make Internet Explorer in win8 work with GPU supporting only with 256 shader uniform vars
1153 * @todo: make it more robust */
1154 max_constantsF = gl_info->limits.glsl_vs_float_constants - 1;
1155 }
1156 else
1157 max_constantsF = gl_info->limits.glsl_vs_float_constants - 3;
1158 }
1159 else
1160#endif
1161 {
1162 max_constantsF = gl_info->limits.glsl_vs_float_constants - 3;
1163 }
1164
1165 if(ctx_priv->cur_vs_args->clip_enabled)
1166 {
1167 max_constantsF -= gl_info->limits.clipplanes;
1168 }
1169 max_constantsF -= count_bits(This->baseShader.reg_maps.integer_constants);
1170 /* Strictly speaking a bool only uses one scalar, but the nvidia(Linux) compiler doesn't pack them properly,
1171 * so each scalar requires a full vec4. We could work around this by packing the booleans ourselves, but
1172 * for now take this into account when calculating the number of available constants
1173 */
1174 max_constantsF -= count_bits(This->baseShader.reg_maps.boolean_constants);
1175 /* Set by driver quirks in directx.c */
1176 max_constantsF -= gl_info->reserved_glsl_constants;
1177 }
1178 else
1179 {
1180 max_constantsF = gl_info->limits.glsl_vs_float_constants;
1181 }
1182 }
1183 max_constantsF = min(This->baseShader.limits.constant_float, max_constantsF);
1184 shader_addline(buffer, "uniform vec4 %cC[%u];\n", prefix, max_constantsF);
1185 }
1186
1187 /* Always declare the full set of constants, the compiler can remove the unused ones because d3d doesn't(yet)
1188 * support indirect int and bool constant addressing. This avoids problems if the app uses e.g. i0 and i9.
1189 */
1190 if (This->baseShader.limits.constant_int > 0 && This->baseShader.reg_maps.integer_constants)
1191 shader_addline(buffer, "uniform ivec4 %cI[%u];\n", prefix, This->baseShader.limits.constant_int);
1192
1193 if (This->baseShader.limits.constant_bool > 0 && This->baseShader.reg_maps.boolean_constants)
1194 shader_addline(buffer, "uniform bool %cB[%u];\n", prefix, This->baseShader.limits.constant_bool);
1195
1196 if(!pshader) {
1197 shader_addline(buffer, "uniform vec4 posFixup;\n");
1198 /* Predeclaration; This function is added at link time based on the pixel shader.
1199 * VS 3.0 shaders have an array OUT[] the shader writes to, earlier versions don't have
1200 * that. We know the input to the reorder function at vertex shader compile time, so
1201 * we can deal with that. The reorder function for a 1.x and 2.x vertex shader can just
1202 * read gl_FrontColor. The output depends on the pixel shader. The reorder function for a
1203 * 1.x and 2.x pshader or for fixed function will write gl_FrontColor, and for a 3.0 shader
1204 * it will write to the varying array. Here we depend on the shader optimizer on sorting that
1205 * out. The nvidia driver only does that if the parameter is inout instead of out, hence the
1206 * inout.
1207 */
1208 if (reg_maps->shader_version.major >= 3)
1209 {
1210 shader_addline(buffer, "void order_ps_input(in vec4[%u]);\n", MAX_REG_OUTPUT);
1211 } else {
1212 shader_addline(buffer, "void order_ps_input();\n");
1213 }
1214 } else {
1215 for (i = 0, map = reg_maps->bumpmat; map; map >>= 1, ++i)
1216 {
1217 if (!(map & 1)) continue;
1218
1219 shader_addline(buffer, "uniform mat2 bumpenvmat%d;\n", i);
1220
1221 if (reg_maps->luminanceparams & (1 << i))
1222 {
1223 shader_addline(buffer, "uniform float luminancescale%d;\n", i);
1224 shader_addline(buffer, "uniform float luminanceoffset%d;\n", i);
1225 extra_constants_needed++;
1226 }
1227
1228 extra_constants_needed++;
1229 }
1230
1231 if (ps_args->srgb_correction)
1232 {
1233 shader_addline(buffer, "const vec4 srgb_const0 = vec4(%.8e, %.8e, %.8e, %.8e);\n",
1234 srgb_pow, srgb_mul_high, srgb_sub_high, srgb_mul_low);
1235 shader_addline(buffer, "const vec4 srgb_const1 = vec4(%.8e, 0.0, 0.0, 0.0);\n",
1236 srgb_cmp);
1237 }
1238 if (reg_maps->vpos || reg_maps->usesdsy)
1239 {
1240 if (This->baseShader.limits.constant_float + extra_constants_needed
1241 + 1 < gl_info->limits.glsl_ps_float_constants)
1242 {
1243 shader_addline(buffer, "uniform vec4 ycorrection;\n");
1244 ((IWineD3DPixelShaderImpl *) This)->vpos_uniform = 1;
1245 extra_constants_needed++;
1246 } else {
1247 /* This happens because we do not have proper tracking of the constant registers that are
1248 * actually used, only the max limit of the shader version
1249 */
1250 FIXME("Cannot find a free uniform for vpos correction params\n");
1251 AssertFailed();
1252 shader_addline(buffer, "const vec4 ycorrection = vec4(%f, %f, 0.0, 0.0);\n",
1253 context->render_offscreen ? 0.0f : ((IWineD3DSurfaceImpl *)device->render_targets[0])->currentDesc.Height,
1254 context->render_offscreen ? 1.0f : -1.0f);
1255 }
1256 shader_addline(buffer, "vec4 vpos;\n");
1257 }
1258 }
1259
1260#ifdef VBOX_WITH_VMSVGA
1261 /* Declare texture samplers before the constants in order to workaround a NVidia driver quirk. */
1262#else
1263 /* Declare texture samplers */
1264 for (i = 0; i < This->baseShader.limits.sampler; i++) {
1265 if (reg_maps->sampler_type[i])
1266 {
1267 switch (reg_maps->sampler_type[i])
1268 {
1269 case WINED3DSTT_1D:
1270 shader_addline(buffer, "uniform sampler1D %csampler%u;\n", prefix, i);
1271 break;
1272 case WINED3DSTT_2D:
1273 if(device->stateBlock->textures[i] &&
1274 IWineD3DBaseTexture_GetTextureDimensions(device->stateBlock->textures[i]) == GL_TEXTURE_RECTANGLE_ARB) {
1275 shader_addline(buffer, "uniform sampler2DRect %csampler%u;\n", prefix, i);
1276 } else {
1277 shader_addline(buffer, "uniform sampler2D %csampler%u;\n", prefix, i);
1278 }
1279 break;
1280 case WINED3DSTT_CUBE:
1281 shader_addline(buffer, "uniform samplerCube %csampler%u;\n", prefix, i);
1282 break;
1283 case WINED3DSTT_VOLUME:
1284 shader_addline(buffer, "uniform sampler3D %csampler%u;\n", prefix, i);
1285 break;
1286 default:
1287 shader_addline(buffer, "uniform unsupported_sampler %csampler%u;\n", prefix, i);
1288 FIXME("Unrecognized sampler type: %#x\n", reg_maps->sampler_type[i]);
1289 break;
1290 }
1291 }
1292 }
1293#endif
1294
1295 /* Declare uniforms for NP2 texcoord fixup:
1296 * This is NOT done inside the loop that declares the texture samplers since the NP2 fixup code
1297 * is currently only used for the GeforceFX series and when forcing the ARB_npot extension off.
1298 * Modern cards just skip the code anyway, so put it inside a separate loop. */
1299 if (pshader && ps_args->np2_fixup) {
1300
1301 struct ps_np2fixup_info* const fixup = ctx_priv->cur_np2fixup_info;
1302 UINT cur = 0;
1303
1304 /* NP2/RECT textures in OpenGL use texcoords in the range [0,width]x[0,height]
1305 * while D3D has them in the (normalized) [0,1]x[0,1] range.
1306 * samplerNP2Fixup stores texture dimensions and is updated through
1307 * shader_glsl_load_np2fixup_constants when the sampler changes. */
1308
1309 for (i = 0; i < This->baseShader.limits.sampler; ++i) {
1310 if (reg_maps->sampler_type[i]) {
1311 if (!(ps_args->np2_fixup & (1 << i))) continue;
1312
1313 if (WINED3DSTT_2D != reg_maps->sampler_type[i]) {
1314 FIXME("Non-2D texture is flagged for NP2 texcoord fixup.\n");
1315 continue;
1316 }
1317
1318 fixup->idx[i] = cur++;
1319 }
1320 }
1321
1322 fixup->num_consts = (cur + 1) >> 1;
1323 shader_addline(buffer, "uniform vec4 %csamplerNP2Fixup[%u];\n", prefix, fixup->num_consts);
1324 }
1325
1326 /* Declare address variables */
1327 for (i = 0, map = reg_maps->address; map; map >>= 1, ++i)
1328 {
1329 if (map & 1) shader_addline(buffer, "ivec4 A%u;\n", i);
1330 }
1331
1332 /* Declare texture coordinate temporaries and initialize them */
1333 for (i = 0, map = reg_maps->texcoord; map; map >>= 1, ++i)
1334 {
1335 if (map & 1) shader_addline(buffer, "vec4 T%u = gl_TexCoord[%u];\n", i, i);
1336 }
1337
1338 /* Declare input register varyings. Only pixel shader, vertex shaders have that declared in the
1339 * helper function shader that is linked in at link time
1340 */
1341 if (pshader && reg_maps->shader_version.major >= 3)
1342 {
1343 if (use_vs(device->stateBlock))
1344 {
1345 shader_addline(buffer, "varying vec4 IN[%u];\n", vec4_varyings(reg_maps->shader_version.major, gl_info));
1346 } else {
1347 /* TODO: Write a replacement shader for the fixed function vertex pipeline, so this isn't needed.
1348 * For fixed function vertex processing + 3.0 pixel shader we need a separate function in the
1349 * pixel shader that reads the fixed function color into the packed input registers.
1350 */
1351 shader_addline(buffer, "vec4 IN[%u];\n", vec4_varyings(reg_maps->shader_version.major, gl_info));
1352 }
1353 }
1354
1355 /* Declare output register temporaries */
1356 if(This->baseShader.limits.packed_output) {
1357 shader_addline(buffer, "vec4 OUT[%u];\n", This->baseShader.limits.packed_output);
1358 }
1359
1360 /* Declare temporary variables */
1361 for (i = 0, map = reg_maps->temporary; map; map >>= 1, ++i)
1362 {
1363 if (map & 1) shader_addline(buffer, "vec4 R%u;\n", i);
1364 }
1365
1366 /* Declare attributes */
1367 if (reg_maps->shader_version.type == WINED3D_SHADER_TYPE_VERTEX)
1368 {
1369 for (i = 0, map = reg_maps->input_registers; map; map >>= 1, ++i)
1370 {
1371 if (map & 1) shader_addline(buffer, "attribute vec4 attrib%i;\n", i);
1372 }
1373 }
1374
1375 /* Declare loop registers aLx */
1376 for (i = 0; i < reg_maps->loop_depth; i++) {
1377 shader_addline(buffer, "int aL%u;\n", i);
1378 shader_addline(buffer, "int tmpInt%u;\n", i);
1379 }
1380
1381 /* Temporary variables for matrix operations */
1382 shader_addline(buffer, "vec4 tmp0;\n");
1383 shader_addline(buffer, "vec4 tmp1;\n");
1384#ifdef VBOX_WITH_VMSVGA
1385 shader_addline(buffer, "bool p0[4];\n");
1386#endif
1387
1388 /* Local constants use a different name so they can be loaded once at shader link time
1389 * They can't be hardcoded into the shader text via LC = {x, y, z, w}; because the
1390 * float -> string conversion can cause precision loss.
1391 */
1392 if(!This->baseShader.load_local_constsF) {
1393 LIST_FOR_EACH_ENTRY(lconst, &This->baseShader.constantsF, local_constant, entry) {
1394 shader_addline(buffer, "uniform vec4 %cLC%u;\n", prefix, lconst->idx);
1395 }
1396 }
1397
1398 shader_addline(buffer, "const float FLT_MAX = 1e38;\n");
1399
1400 /* Start the main program */
1401 shader_addline(buffer, "void main() {\n");
1402 if(pshader && reg_maps->vpos) {
1403 /* DirectX apps expect integer values, while OpenGL drivers add approximately 0.5. This causes
1404 * off-by-one problems as spotted by the vPos d3d9 visual test. Unfortunately the ATI cards do
1405 * not add exactly 0.5, but rather something like 0.49999999 or 0.50000001, which still causes
1406 * precision troubles when we just substract 0.5.
1407 *
1408 * To deal with that just floor() the position. This will eliminate the fraction on all cards.
1409 *
1410 * TODO: Test how that behaves with multisampling once we can enable multisampling in winex11.
1411 *
1412 * An advantage of floor is that it works even if the driver doesn't add 1/2. It is somewhat
1413 * questionable if 1.5, 2.5, ... are the proper values to return in gl_FragCoord, even though
1414 * coordinates specify the pixel centers instead of the pixel corners. This code will behave
1415 * correctly on drivers that returns integer values.
1416 */
1417 shader_addline(buffer, "vpos = floor(vec4(0, ycorrection[0], 0, 0) + gl_FragCoord * vec4(1, ycorrection[1], 1, 1));\n");
1418 }
1419}
1420
1421/*****************************************************************************
1422 * Functions to generate GLSL strings from DirectX Shader bytecode begin here.
1423 *
1424 * For more information, see http://wiki.winehq.org/DirectX-Shaders
1425 ****************************************************************************/
1426
1427/* Prototypes */
1428static void shader_glsl_add_src_param(const struct wined3d_shader_instruction *ins,
1429 const struct wined3d_shader_src_param *wined3d_src, DWORD mask, glsl_src_param_t *glsl_src);
1430
1431/** Used for opcode modifiers - They multiply the result by the specified amount */
1432static const char * const shift_glsl_tab[] = {
1433 "", /* 0 (none) */
1434 "2.0 * ", /* 1 (x2) */
1435 "4.0 * ", /* 2 (x4) */
1436 "8.0 * ", /* 3 (x8) */
1437 "16.0 * ", /* 4 (x16) */
1438 "32.0 * ", /* 5 (x32) */
1439 "", /* 6 (x64) */
1440 "", /* 7 (x128) */
1441 "", /* 8 (d256) */
1442 "", /* 9 (d128) */
1443 "", /* 10 (d64) */
1444 "", /* 11 (d32) */
1445 "0.0625 * ", /* 12 (d16) */
1446 "0.125 * ", /* 13 (d8) */
1447 "0.25 * ", /* 14 (d4) */
1448 "0.5 * " /* 15 (d2) */
1449};
1450
1451/* Generate a GLSL parameter that does the input modifier computation and return the input register/mask to use */
1452static void shader_glsl_gen_modifier(DWORD src_modifier, const char *in_reg, const char *in_regswizzle, char *out_str)
1453{
1454 out_str[0] = 0;
1455
1456 switch (src_modifier)
1457 {
1458 case WINED3DSPSM_DZ: /* Need to handle this in the instructions itself (texld & texcrd). */
1459 case WINED3DSPSM_DW:
1460 case WINED3DSPSM_NONE:
1461 sprintf(out_str, "%s%s", in_reg, in_regswizzle);
1462 break;
1463 case WINED3DSPSM_NEG:
1464 sprintf(out_str, "-%s%s", in_reg, in_regswizzle);
1465 break;
1466 case WINED3DSPSM_NOT:
1467 sprintf(out_str, "!%s%s", in_reg, in_regswizzle);
1468 break;
1469 case WINED3DSPSM_BIAS:
1470 sprintf(out_str, "(%s%s - vec4(0.5)%s)", in_reg, in_regswizzle, in_regswizzle);
1471 break;
1472 case WINED3DSPSM_BIASNEG:
1473 sprintf(out_str, "-(%s%s - vec4(0.5)%s)", in_reg, in_regswizzle, in_regswizzle);
1474 break;
1475 case WINED3DSPSM_SIGN:
1476 sprintf(out_str, "(2.0 * (%s%s - 0.5))", in_reg, in_regswizzle);
1477 break;
1478 case WINED3DSPSM_SIGNNEG:
1479 sprintf(out_str, "-(2.0 * (%s%s - 0.5))", in_reg, in_regswizzle);
1480 break;
1481 case WINED3DSPSM_COMP:
1482 sprintf(out_str, "(1.0 - %s%s)", in_reg, in_regswizzle);
1483 break;
1484 case WINED3DSPSM_X2:
1485 sprintf(out_str, "(2.0 * %s%s)", in_reg, in_regswizzle);
1486 break;
1487 case WINED3DSPSM_X2NEG:
1488 sprintf(out_str, "-(2.0 * %s%s)", in_reg, in_regswizzle);
1489 break;
1490 case WINED3DSPSM_ABS:
1491 sprintf(out_str, "abs(%s%s)", in_reg, in_regswizzle);
1492 break;
1493 case WINED3DSPSM_ABSNEG:
1494 sprintf(out_str, "-abs(%s%s)", in_reg, in_regswizzle);
1495 break;
1496 default:
1497 FIXME("Unhandled modifier %u\n", src_modifier);
1498 sprintf(out_str, "%s%s", in_reg, in_regswizzle);
1499 }
1500}
1501
1502/** Writes the GLSL variable name that corresponds to the register that the
1503 * DX opcode parameter is trying to access */
1504static void shader_glsl_get_register_name(const struct wined3d_shader_register *reg,
1505 char *register_name, BOOL *is_color, const struct wined3d_shader_instruction *ins)
1506{
1507 /* oPos, oFog and oPts in D3D */
1508 static const char * const hwrastout_reg_names[] = { "gl_Position", "gl_FogFragCoord", "gl_PointSize" };
1509
1510 IWineD3DBaseShaderImpl *This = (IWineD3DBaseShaderImpl *)ins->ctx->shader;
1511 const struct wined3d_gl_info *gl_info = ins->ctx->gl_info;
1512 char pshader = shader_is_pshader_version(This->baseShader.reg_maps.shader_version.type);
1513
1514 *is_color = FALSE;
1515
1516 switch (reg->type)
1517 {
1518 case WINED3DSPR_TEMP:
1519 sprintf(register_name, "R%u", reg->idx);
1520 break;
1521
1522 case WINED3DSPR_INPUT:
1523 /* vertex shaders */
1524 if (!pshader)
1525 {
1526 struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data;
1527 if (priv->cur_vs_args->swizzle_map & (1 << reg->idx)) *is_color = TRUE;
1528 sprintf(register_name, "attrib%u", reg->idx);
1529 break;
1530 }
1531
1532 /* pixel shaders >= 3.0 */
1533 if (This->baseShader.reg_maps.shader_version.major >= 3)
1534 {
1535 DWORD idx = ((IWineD3DPixelShaderImpl *)This)->input_reg_map[reg->idx];
1536 unsigned int in_count = vec4_varyings(This->baseShader.reg_maps.shader_version.major, gl_info);
1537
1538 if (reg->rel_addr)
1539 {
1540 glsl_src_param_t rel_param;
1541
1542 shader_glsl_add_src_param(ins, reg->rel_addr, WINED3DSP_WRITEMASK_0, &rel_param);
1543
1544 /* Removing a + 0 would be an obvious optimization, but macos doesn't see the NOP
1545 * operation there */
1546 if (idx)
1547 {
1548 if (((IWineD3DPixelShaderImpl *)This)->declared_in_count > in_count)
1549 {
1550 sprintf(register_name,
1551 "((%s + %u) > %d ? (%s + %u) > %d ? gl_SecondaryColor : gl_Color : IN[%s + %u])",
1552 rel_param.param_str, idx, in_count - 1, rel_param.param_str, idx, in_count,
1553 rel_param.param_str, idx);
1554 }
1555 else
1556 {
1557 sprintf(register_name, "IN[%s + %u]", rel_param.param_str, idx);
1558 }
1559 }
1560 else
1561 {
1562 if (((IWineD3DPixelShaderImpl *)This)->declared_in_count > in_count)
1563 {
1564 sprintf(register_name, "((%s) > %d ? (%s) > %d ? gl_SecondaryColor : gl_Color : IN[%s])",
1565 rel_param.param_str, in_count - 1, rel_param.param_str, in_count,
1566 rel_param.param_str);
1567 }
1568 else
1569 {
1570 sprintf(register_name, "IN[%s]", rel_param.param_str);
1571 }
1572 }
1573 }
1574 else
1575 {
1576 if (idx == in_count) sprintf(register_name, "gl_Color");
1577 else if (idx == in_count + 1) sprintf(register_name, "gl_SecondaryColor");
1578 else sprintf(register_name, "IN[%u]", idx);
1579 }
1580 }
1581 else
1582 {
1583 if (reg->idx == 0) strcpy(register_name, "gl_Color");
1584 else strcpy(register_name, "gl_SecondaryColor");
1585 break;
1586 }
1587 break;
1588
1589 case WINED3DSPR_CONST:
1590 {
1591 const char prefix = pshader ? 'P' : 'V';
1592
1593 /* Relative addressing */
1594 if (reg->rel_addr)
1595 {
1596 glsl_src_param_t rel_param;
1597 shader_glsl_add_src_param(ins, reg->rel_addr, WINED3DSP_WRITEMASK_0, &rel_param);
1598 if (reg->idx) sprintf(register_name, "%cC[%s + %u]", prefix, rel_param.param_str, reg->idx);
1599 else sprintf(register_name, "%cC[%s]", prefix, rel_param.param_str);
1600 }
1601 else
1602 {
1603 if (shader_constant_is_local(This, reg->idx))
1604 sprintf(register_name, "%cLC%u", prefix, reg->idx);
1605 else
1606 sprintf(register_name, "%cC[%u]", prefix, reg->idx);
1607 }
1608 }
1609 break;
1610
1611 case WINED3DSPR_CONSTINT:
1612 if (pshader) sprintf(register_name, "PI[%u]", reg->idx);
1613 else sprintf(register_name, "VI[%u]", reg->idx);
1614 break;
1615
1616 case WINED3DSPR_CONSTBOOL:
1617 if (pshader) sprintf(register_name, "PB[%u]", reg->idx);
1618 else sprintf(register_name, "VB[%u]", reg->idx);
1619 break;
1620
1621 case WINED3DSPR_TEXTURE: /* case WINED3DSPR_ADDR: */
1622 if (pshader) sprintf(register_name, "T%u", reg->idx);
1623 else sprintf(register_name, "A%u", reg->idx);
1624 break;
1625
1626 case WINED3DSPR_LOOP:
1627 sprintf(register_name, "aL%u", This->baseShader.cur_loop_regno - 1);
1628 break;
1629
1630 case WINED3DSPR_SAMPLER:
1631 if (pshader) sprintf(register_name, "Psampler%u", reg->idx);
1632 else sprintf(register_name, "Vsampler%u", reg->idx);
1633 break;
1634
1635 case WINED3DSPR_COLOROUT:
1636 if (reg->idx >= gl_info->limits.buffers)
1637 WARN("Write to render target %u, only %d supported.\n", reg->idx, gl_info->limits.buffers);
1638
1639 sprintf(register_name, "gl_FragData[%u]", reg->idx);
1640 break;
1641
1642 case WINED3DSPR_RASTOUT:
1643 if (reg->idx < RT_ELEMENTS(hwrastout_reg_names)) sprintf(register_name, "%s", hwrastout_reg_names[reg->idx]);
1644 else sprintf(register_name, "%s", hwrastout_reg_names[0]);
1645 break;
1646
1647 case WINED3DSPR_DEPTHOUT:
1648 sprintf(register_name, "gl_FragDepth");
1649 break;
1650
1651 case WINED3DSPR_ATTROUT:
1652 if (reg->idx == 0) sprintf(register_name, "gl_FrontColor");
1653 else sprintf(register_name, "gl_FrontSecondaryColor");
1654 break;
1655
1656 case WINED3DSPR_TEXCRDOUT:
1657 /* Vertex shaders >= 3.0: WINED3DSPR_OUTPUT */
1658 if (This->baseShader.reg_maps.shader_version.major >= 3) sprintf(register_name, "OUT[%u]", reg->idx);
1659 else sprintf(register_name, "gl_TexCoord[%u]", reg->idx);
1660 break;
1661
1662 case WINED3DSPR_MISCTYPE:
1663 if (reg->idx == 0)
1664 {
1665 /* vPos */
1666 sprintf(register_name, "vpos");
1667 }
1668 else if (reg->idx == 1)
1669 {
1670 /* Note that gl_FrontFacing is a bool, while vFace is
1671 * a float for which the sign determines front/back */
1672 sprintf(register_name, "(gl_FrontFacing ? 1.0 : -1.0)");
1673 }
1674 else
1675 {
1676 FIXME("Unhandled misctype register %d\n", reg->idx);
1677 sprintf(register_name, "unrecognized_register");
1678 }
1679 break;
1680
1681 case WINED3DSPR_IMMCONST:
1682 switch (reg->immconst_type)
1683 {
1684 case WINED3D_IMMCONST_FLOAT:
1685 sprintf(register_name, "%.8e", *(const float *)reg->immconst_data);
1686 break;
1687
1688 case WINED3D_IMMCONST_FLOAT4:
1689 sprintf(register_name, "vec4(%.8e, %.8e, %.8e, %.8e)",
1690 *(const float *)&reg->immconst_data[0], *(const float *)&reg->immconst_data[1],
1691 *(const float *)&reg->immconst_data[2], *(const float *)&reg->immconst_data[3]);
1692 break;
1693
1694 default:
1695 FIXME("Unhandled immconst type %#x\n", reg->immconst_type);
1696 sprintf(register_name, "<unhandled_immconst_type %#x>", reg->immconst_type);
1697 }
1698 break;
1699
1700#ifdef VBOX_WITH_VMSVGA
1701 case WINED3DSPR_PREDICATE:
1702 sprintf(register_name, "p0");
1703 break;
1704#endif
1705
1706 default:
1707 FIXME("Unhandled register name Type(%d)\n", reg->type);
1708 sprintf(register_name, "unrecognized_register");
1709 break;
1710 }
1711}
1712
1713static void shader_glsl_write_mask_to_str(DWORD write_mask, char *str)
1714{
1715 *str++ = '.';
1716 if (write_mask & WINED3DSP_WRITEMASK_0) *str++ = 'x';
1717 if (write_mask & WINED3DSP_WRITEMASK_1) *str++ = 'y';
1718 if (write_mask & WINED3DSP_WRITEMASK_2) *str++ = 'z';
1719 if (write_mask & WINED3DSP_WRITEMASK_3) *str++ = 'w';
1720 *str = '\0';
1721}
1722
1723/* Get the GLSL write mask for the destination register */
1724static DWORD shader_glsl_get_write_mask(const struct wined3d_shader_dst_param *param, char *write_mask)
1725{
1726 DWORD mask = param->write_mask;
1727
1728 if (shader_is_scalar(&param->reg))
1729 {
1730 mask = WINED3DSP_WRITEMASK_0;
1731 *write_mask = '\0';
1732 }
1733 else
1734 {
1735#ifdef VBOX_WITH_VMSVGA
1736 if (param->reg.type == WINED3DSPR_PREDICATE)
1737 {
1738 *write_mask++ = '[';
1739 if (mask & WINED3DSP_WRITEMASK_0) *write_mask++ = '0';
1740 else
1741 if (mask & WINED3DSP_WRITEMASK_1) *write_mask++ = '1';
1742 else
1743 if (mask & WINED3DSP_WRITEMASK_2) *write_mask++ = '2';
1744 else
1745 if (mask & WINED3DSP_WRITEMASK_3) *write_mask++ = '3';
1746 *write_mask++ = ']';
1747 *write_mask = '\0';
1748 }
1749 else
1750#endif
1751 shader_glsl_write_mask_to_str(mask, write_mask);
1752 }
1753
1754 return mask;
1755}
1756
1757static unsigned int shader_glsl_get_write_mask_size(DWORD write_mask) {
1758 unsigned int size = 0;
1759
1760 if (write_mask & WINED3DSP_WRITEMASK_0) ++size;
1761 if (write_mask & WINED3DSP_WRITEMASK_1) ++size;
1762 if (write_mask & WINED3DSP_WRITEMASK_2) ++size;
1763 if (write_mask & WINED3DSP_WRITEMASK_3) ++size;
1764
1765 return size;
1766}
1767
1768static void shader_glsl_swizzle_to_str(const DWORD swizzle, BOOL fixup, DWORD mask, char *str)
1769{
1770 /* For registers of type WINED3DDECLTYPE_D3DCOLOR, data is stored as "bgra",
1771 * but addressed as "rgba". To fix this we need to swap the register's x
1772 * and z components. */
1773 const char *swizzle_chars = fixup ? "zyxw" : "xyzw";
1774
1775 *str++ = '.';
1776 /* swizzle bits fields: wwzzyyxx */
1777 if (mask & WINED3DSP_WRITEMASK_0) *str++ = swizzle_chars[swizzle & 0x03];
1778 if (mask & WINED3DSP_WRITEMASK_1) *str++ = swizzle_chars[(swizzle >> 2) & 0x03];
1779 if (mask & WINED3DSP_WRITEMASK_2) *str++ = swizzle_chars[(swizzle >> 4) & 0x03];
1780 if (mask & WINED3DSP_WRITEMASK_3) *str++ = swizzle_chars[(swizzle >> 6) & 0x03];
1781 *str = '\0';
1782}
1783
1784static void shader_glsl_get_swizzle(const struct wined3d_shader_src_param *param,
1785 BOOL fixup, DWORD mask, char *swizzle_str)
1786{
1787 if (shader_is_scalar(&param->reg))
1788 *swizzle_str = '\0';
1789 else
1790 shader_glsl_swizzle_to_str(param->swizzle, fixup, mask, swizzle_str);
1791}
1792
1793/* From a given parameter token, generate the corresponding GLSL string.
1794 * Also, return the actual register name and swizzle in case the
1795 * caller needs this information as well. */
1796static void shader_glsl_add_src_param(const struct wined3d_shader_instruction *ins,
1797 const struct wined3d_shader_src_param *wined3d_src, DWORD mask, glsl_src_param_t *glsl_src)
1798{
1799 BOOL is_color = FALSE;
1800 char swizzle_str[6];
1801
1802 glsl_src->reg_name[0] = '\0';
1803 glsl_src->param_str[0] = '\0';
1804 swizzle_str[0] = '\0';
1805
1806 shader_glsl_get_register_name(&wined3d_src->reg, glsl_src->reg_name, &is_color, ins);
1807 shader_glsl_get_swizzle(wined3d_src, is_color, mask, swizzle_str);
1808 shader_glsl_gen_modifier(wined3d_src->modifiers, glsl_src->reg_name, swizzle_str, glsl_src->param_str);
1809}
1810
1811/* From a given parameter token, generate the corresponding GLSL string.
1812 * Also, return the actual register name and swizzle in case the
1813 * caller needs this information as well. */
1814static DWORD shader_glsl_add_dst_param(const struct wined3d_shader_instruction *ins,
1815 const struct wined3d_shader_dst_param *wined3d_dst, glsl_dst_param_t *glsl_dst)
1816{
1817 BOOL is_color = FALSE;
1818
1819 glsl_dst->mask_str[0] = '\0';
1820 glsl_dst->reg_name[0] = '\0';
1821
1822 shader_glsl_get_register_name(&wined3d_dst->reg, glsl_dst->reg_name, &is_color, ins);
1823 return shader_glsl_get_write_mask(wined3d_dst, glsl_dst->mask_str);
1824}
1825
1826/* Append the destination part of the instruction to the buffer, return the effective write mask */
1827static DWORD shader_glsl_append_dst_ext(struct wined3d_shader_buffer *buffer,
1828 const struct wined3d_shader_instruction *ins, const struct wined3d_shader_dst_param *dst)
1829{
1830 glsl_dst_param_t glsl_dst;
1831 DWORD mask;
1832
1833 mask = shader_glsl_add_dst_param(ins, dst, &glsl_dst);
1834 if (mask) shader_addline(buffer, "%s%s = %s(", glsl_dst.reg_name, glsl_dst.mask_str, shift_glsl_tab[dst->shift]);
1835
1836 return mask;
1837}
1838
1839/* Append the destination part of the instruction to the buffer, return the effective write mask */
1840static DWORD shader_glsl_append_dst(struct wined3d_shader_buffer *buffer, const struct wined3d_shader_instruction *ins)
1841{
1842 return shader_glsl_append_dst_ext(buffer, ins, &ins->dst[0]);
1843}
1844
1845/** Process GLSL instruction modifiers */
1846static void shader_glsl_add_instruction_modifiers(const struct wined3d_shader_instruction *ins)
1847{
1848 glsl_dst_param_t dst_param;
1849 DWORD modifiers;
1850
1851 if (!ins->dst_count) return;
1852
1853 modifiers = ins->dst[0].modifiers;
1854 if (!modifiers) return;
1855
1856 shader_glsl_add_dst_param(ins, &ins->dst[0], &dst_param);
1857
1858 if (modifiers & WINED3DSPDM_SATURATE)
1859 {
1860 /* _SAT means to clamp the value of the register to between 0 and 1 */
1861 shader_addline(ins->ctx->buffer, "%s%s = clamp(%s%s, 0.0, 1.0);\n", dst_param.reg_name,
1862 dst_param.mask_str, dst_param.reg_name, dst_param.mask_str);
1863 }
1864
1865 if (modifiers & WINED3DSPDM_MSAMPCENTROID)
1866 {
1867 FIXME("_centroid modifier not handled\n");
1868 }
1869
1870 if (modifiers & WINED3DSPDM_PARTIALPRECISION)
1871 {
1872 /* MSDN says this modifier can be safely ignored, so that's what we'll do. */
1873 }
1874}
1875
1876static inline const char *shader_get_comp_op(DWORD op)
1877{
1878 switch (op) {
1879 case COMPARISON_GT: return ">";
1880 case COMPARISON_EQ: return "==";
1881 case COMPARISON_GE: return ">=";
1882 case COMPARISON_LT: return "<";
1883 case COMPARISON_NE: return "!=";
1884 case COMPARISON_LE: return "<=";
1885 default:
1886 FIXME("Unrecognized comparison value: %u\n", op);
1887 return "(\?\?)";
1888 }
1889}
1890
1891static void shader_glsl_get_sample_function(const struct wined3d_gl_info *gl_info,
1892 DWORD sampler_type, DWORD flags, glsl_sample_function_t *sample_function)
1893{
1894 BOOL projected = flags & WINED3D_GLSL_SAMPLE_PROJECTED;
1895 BOOL texrect = flags & WINED3D_GLSL_SAMPLE_RECT;
1896 BOOL lod = flags & WINED3D_GLSL_SAMPLE_LOD;
1897 BOOL grad = flags & WINED3D_GLSL_SAMPLE_GRAD;
1898
1899 /* Note that there's no such thing as a projected cube texture. */
1900 switch(sampler_type) {
1901 case WINED3DSTT_1D:
1902 if(lod) {
1903 sample_function->name = projected ? "texture1DProjLod" : "texture1DLod";
1904 }
1905 else if (grad)
1906 {
1907 if (gl_info->supported[EXT_GPU_SHADER4])
1908 sample_function->name = projected ? "texture1DProjGrad" : "texture1DGrad";
1909 else if (gl_info->supported[ARB_SHADER_TEXTURE_LOD])
1910 sample_function->name = projected ? "texture1DProjGradARB" : "texture1DGradARB";
1911 else
1912 {
1913 FIXME("Unsupported 1D grad function.\n");
1914 sample_function->name = "unsupported1DGrad";
1915 }
1916 }
1917 else
1918 {
1919 sample_function->name = projected ? "texture1DProj" : "texture1D";
1920 }
1921 sample_function->coord_mask = WINED3DSP_WRITEMASK_0;
1922 break;
1923 case WINED3DSTT_2D:
1924 if(texrect) {
1925 if(lod) {
1926 sample_function->name = projected ? "texture2DRectProjLod" : "texture2DRectLod";
1927 }
1928 else if (grad)
1929 {
1930 if (gl_info->supported[EXT_GPU_SHADER4])
1931 sample_function->name = projected ? "texture2DRectProjGrad" : "texture2DRectGrad";
1932 else if (gl_info->supported[ARB_SHADER_TEXTURE_LOD])
1933 sample_function->name = projected ? "texture2DRectProjGradARB" : "texture2DRectGradARB";
1934 else
1935 {
1936 FIXME("Unsupported RECT grad function.\n");
1937 sample_function->name = "unsupported2DRectGrad";
1938 }
1939 }
1940 else
1941 {
1942 sample_function->name = projected ? "texture2DRectProj" : "texture2DRect";
1943 }
1944 } else {
1945 if(lod) {
1946 sample_function->name = projected ? "texture2DProjLod" : "texture2DLod";
1947 }
1948 else if (grad)
1949 {
1950 if (gl_info->supported[EXT_GPU_SHADER4])
1951 sample_function->name = projected ? "texture2DProjGrad" : "texture2DGrad";
1952 else if (gl_info->supported[ARB_SHADER_TEXTURE_LOD])
1953 sample_function->name = projected ? "texture2DProjGradARB" : "texture2DGradARB";
1954 else
1955 {
1956 FIXME("Unsupported 2D grad function.\n");
1957 sample_function->name = "unsupported2DGrad";
1958 }
1959 }
1960 else
1961 {
1962 sample_function->name = projected ? "texture2DProj" : "texture2D";
1963 }
1964 }
1965 sample_function->coord_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1;
1966 break;
1967 case WINED3DSTT_CUBE:
1968 if(lod) {
1969 sample_function->name = "textureCubeLod";
1970 }
1971 else if (grad)
1972 {
1973 if (gl_info->supported[EXT_GPU_SHADER4])
1974 sample_function->name = "textureCubeGrad";
1975 else if (gl_info->supported[ARB_SHADER_TEXTURE_LOD])
1976 sample_function->name = "textureCubeGradARB";
1977 else
1978 {
1979 FIXME("Unsupported Cube grad function.\n");
1980 sample_function->name = "unsupportedCubeGrad";
1981 }
1982 }
1983 else
1984 {
1985 sample_function->name = "textureCube";
1986 }
1987 sample_function->coord_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
1988 break;
1989 case WINED3DSTT_VOLUME:
1990 if(lod) {
1991 sample_function->name = projected ? "texture3DProjLod" : "texture3DLod";
1992 }
1993 else if (grad)
1994 {
1995 if (gl_info->supported[EXT_GPU_SHADER4])
1996 sample_function->name = projected ? "texture3DProjGrad" : "texture3DGrad";
1997 else if (gl_info->supported[ARB_SHADER_TEXTURE_LOD])
1998 sample_function->name = projected ? "texture3DProjGradARB" : "texture3DGradARB";
1999 else
2000 {
2001 FIXME("Unsupported 3D grad function.\n");
2002 sample_function->name = "unsupported3DGrad";
2003 }
2004 }
2005 else
2006 {
2007 sample_function->name = projected ? "texture3DProj" : "texture3D";
2008 }
2009 sample_function->coord_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
2010 break;
2011 default:
2012 sample_function->name = "";
2013 sample_function->coord_mask = 0;
2014 FIXME("Unrecognized sampler type: %#x;\n", sampler_type);
2015 break;
2016 }
2017}
2018
2019static void shader_glsl_append_fixup_arg(char *arguments, const char *reg_name,
2020 BOOL sign_fixup, enum fixup_channel_source channel_source)
2021{
2022 switch(channel_source)
2023 {
2024 case CHANNEL_SOURCE_ZERO:
2025 strcat(arguments, "0.0");
2026 break;
2027
2028 case CHANNEL_SOURCE_ONE:
2029 strcat(arguments, "1.0");
2030 break;
2031
2032 case CHANNEL_SOURCE_X:
2033 strcat(arguments, reg_name);
2034 strcat(arguments, ".x");
2035 break;
2036
2037 case CHANNEL_SOURCE_Y:
2038 strcat(arguments, reg_name);
2039 strcat(arguments, ".y");
2040 break;
2041
2042 case CHANNEL_SOURCE_Z:
2043 strcat(arguments, reg_name);
2044 strcat(arguments, ".z");
2045 break;
2046
2047 case CHANNEL_SOURCE_W:
2048 strcat(arguments, reg_name);
2049 strcat(arguments, ".w");
2050 break;
2051
2052 default:
2053 FIXME("Unhandled channel source %#x\n", channel_source);
2054 strcat(arguments, "undefined");
2055 break;
2056 }
2057
2058 if (sign_fixup) strcat(arguments, " * 2.0 - 1.0");
2059}
2060
2061static void shader_glsl_color_correction(const struct wined3d_shader_instruction *ins, struct color_fixup_desc fixup)
2062{
2063 struct wined3d_shader_dst_param dst;
2064 unsigned int mask_size, remaining;
2065 glsl_dst_param_t dst_param;
2066 char arguments[256];
2067 DWORD mask;
2068
2069 mask = 0;
2070 if (fixup.x_sign_fixup || fixup.x_source != CHANNEL_SOURCE_X) mask |= WINED3DSP_WRITEMASK_0;
2071 if (fixup.y_sign_fixup || fixup.y_source != CHANNEL_SOURCE_Y) mask |= WINED3DSP_WRITEMASK_1;
2072 if (fixup.z_sign_fixup || fixup.z_source != CHANNEL_SOURCE_Z) mask |= WINED3DSP_WRITEMASK_2;
2073 if (fixup.w_sign_fixup || fixup.w_source != CHANNEL_SOURCE_W) mask |= WINED3DSP_WRITEMASK_3;
2074 mask &= ins->dst[0].write_mask;
2075
2076 if (!mask) return; /* Nothing to do */
2077
2078 if (is_complex_fixup(fixup))
2079 {
2080 enum complex_fixup complex_fixup = get_complex_fixup(fixup);
2081 FIXME("Complex fixup (%#x) not supported\n",complex_fixup); (void)complex_fixup;
2082 return;
2083 }
2084
2085 mask_size = shader_glsl_get_write_mask_size(mask);
2086
2087 dst = ins->dst[0];
2088 dst.write_mask = mask;
2089 shader_glsl_add_dst_param(ins, &dst, &dst_param);
2090
2091 arguments[0] = '\0';
2092 remaining = mask_size;
2093 if (mask & WINED3DSP_WRITEMASK_0)
2094 {
2095 shader_glsl_append_fixup_arg(arguments, dst_param.reg_name, fixup.x_sign_fixup, fixup.x_source);
2096 if (--remaining) strcat(arguments, ", ");
2097 }
2098 if (mask & WINED3DSP_WRITEMASK_1)
2099 {
2100 shader_glsl_append_fixup_arg(arguments, dst_param.reg_name, fixup.y_sign_fixup, fixup.y_source);
2101 if (--remaining) strcat(arguments, ", ");
2102 }
2103 if (mask & WINED3DSP_WRITEMASK_2)
2104 {
2105 shader_glsl_append_fixup_arg(arguments, dst_param.reg_name, fixup.z_sign_fixup, fixup.z_source);
2106 if (--remaining) strcat(arguments, ", ");
2107 }
2108 if (mask & WINED3DSP_WRITEMASK_3)
2109 {
2110 shader_glsl_append_fixup_arg(arguments, dst_param.reg_name, fixup.w_sign_fixup, fixup.w_source);
2111 if (--remaining) strcat(arguments, ", ");
2112 }
2113
2114 if (mask_size > 1)
2115 {
2116 shader_addline(ins->ctx->buffer, "%s%s = vec%u(%s);\n",
2117 dst_param.reg_name, dst_param.mask_str, mask_size, arguments);
2118 }
2119 else
2120 {
2121 shader_addline(ins->ctx->buffer, "%s%s = %s;\n", dst_param.reg_name, dst_param.mask_str, arguments);
2122 }
2123}
2124
2125static void PRINTF_ATTR(8, 9) shader_glsl_gen_sample_code(const struct wined3d_shader_instruction *ins,
2126 DWORD sampler, const glsl_sample_function_t *sample_function, DWORD swizzle,
2127 const char *dx, const char *dy,
2128 const char *bias, const char *coord_reg_fmt, ...)
2129{
2130 const char *sampler_base;
2131 char dst_swizzle[6];
2132 struct color_fixup_desc fixup;
2133 BOOL np2_fixup = FALSE;
2134 BOOL tmirror_tmp_reg = FALSE;
2135 va_list args;
2136
2137 shader_glsl_swizzle_to_str(swizzle, FALSE, ins->dst[0].write_mask, dst_swizzle);
2138
2139 if (shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type))
2140 {
2141 const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data;
2142 fixup = priv->cur_ps_args->color_fixup[sampler];
2143 sampler_base = "Psampler";
2144
2145 if (priv->cur_ps_args->np2_fixup & (1 << sampler)) {
2146 if(bias) {
2147 FIXME("Biased sampling from NP2 textures is unsupported\n");
2148 } else {
2149 np2_fixup = TRUE;
2150 }
2151 }
2152
2153 if (priv->cur_ps_args->t_mirror & (1 << sampler))
2154 {
2155 if (ins->ctx->reg_maps->sampler_type[sampler]==WINED3DSTT_2D)
2156 {
2157 if (sample_function->coord_mask & WINED3DSP_WRITEMASK_1)
2158 {
2159 glsl_src_param_t coord_param;
2160 shader_glsl_add_src_param(ins, &ins->src[0], sample_function->coord_mask, &coord_param);
2161
2162 if (ins->src[0].reg.type != WINED3DSPR_INPUT)
2163 {
2164 shader_addline(ins->ctx->buffer, "%s.y=1.0-%s.y;\n",
2165 coord_param.reg_name, coord_param.reg_name);
2166 }
2167 else
2168 {
2169 tmirror_tmp_reg = TRUE;
2170 shader_addline(ins->ctx->buffer, "tmp0.xy=vec2(%s.x, 1.0-%s.y).xy;\n",
2171 coord_param.reg_name, coord_param.reg_name);
2172 }
2173 }
2174 else
2175 {
2176 DebugBreak();
2177 FIXME("Unexpected coord_mask with t_mirror\n");
2178 }
2179 }
2180 }
2181 } else {
2182 sampler_base = "Vsampler";
2183 fixup = COLOR_FIXUP_IDENTITY; /* FIXME: Vshader color fixup */
2184 }
2185
2186 shader_glsl_append_dst(ins->ctx->buffer, ins);
2187
2188 shader_addline(ins->ctx->buffer, "%s(%s%u, ", sample_function->name, sampler_base, sampler);
2189
2190 if (tmirror_tmp_reg)
2191 {
2192 shader_addline(ins->ctx->buffer, "%s", "tmp0.xy");
2193 }
2194 else
2195 {
2196 va_start(args, coord_reg_fmt);
2197 shader_vaddline(ins->ctx->buffer, coord_reg_fmt, args);
2198 va_end(args);
2199 }
2200
2201 if(bias) {
2202 shader_addline(ins->ctx->buffer, ", %s)%s);\n", bias, dst_swizzle);
2203 } else {
2204 if (np2_fixup) {
2205 const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data;
2206 const unsigned char idx = priv->cur_np2fixup_info->idx[sampler];
2207
2208 shader_addline(ins->ctx->buffer, " * PsamplerNP2Fixup[%u].%s)%s);\n", idx >> 1,
2209 (idx % 2) ? "zw" : "xy", dst_swizzle);
2210 } else if(dx && dy) {
2211 shader_addline(ins->ctx->buffer, ", %s, %s)%s);\n", dx, dy, dst_swizzle);
2212 } else {
2213 shader_addline(ins->ctx->buffer, ")%s);\n", dst_swizzle);
2214 }
2215 }
2216
2217 if(!is_identity_fixup(fixup)) {
2218 shader_glsl_color_correction(ins, fixup);
2219 }
2220}
2221
2222/*****************************************************************************
2223 * Begin processing individual instruction opcodes
2224 ****************************************************************************/
2225
2226/* Generate GLSL arithmetic functions (dst = src1 + src2) */
2227static void shader_glsl_arith(const struct wined3d_shader_instruction *ins)
2228{
2229 struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
2230 glsl_src_param_t src0_param;
2231 glsl_src_param_t src1_param;
2232 DWORD write_mask;
2233 char op;
2234
2235 /* Determine the GLSL operator to use based on the opcode */
2236 switch (ins->handler_idx)
2237 {
2238 case WINED3DSIH_MUL: op = '*'; break;
2239 case WINED3DSIH_ADD: op = '+'; break;
2240 case WINED3DSIH_SUB: op = '-'; break;
2241 default:
2242 op = ' ';
2243 FIXME("Opcode %#x not yet handled in GLSL\n", ins->handler_idx);
2244 break;
2245 }
2246
2247 write_mask = shader_glsl_append_dst(buffer, ins);
2248 shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src0_param);
2249 shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param);
2250 shader_addline(buffer, "%s %c %s);\n", src0_param.param_str, op, src1_param.param_str);
2251}
2252
2253#ifdef VBOX_WITH_VMSVGA
2254static void shader_glsl_mov_impl(const struct wined3d_shader_instruction *ins, int p0_idx);
2255
2256/* Process the WINED3DSIO_MOV opcode using GLSL (dst = src) */
2257static void shader_glsl_mov(const struct wined3d_shader_instruction *ins)
2258{
2259 if (ins->predicate)
2260 {
2261 int i;
2262 DWORD dst_mask = ins->dst[0].write_mask;
2263 struct wined3d_shader_dst_param *dst = (struct wined3d_shader_dst_param *)&ins->dst[0];
2264
2265 for (i = 0; i < 4; i++)
2266 {
2267 if (dst_mask & RT_BIT(i))
2268 {
2269 dst->write_mask = RT_BIT(i);
2270
2271 shader_glsl_mov_impl(ins, i);
2272 }
2273 }
2274 dst->write_mask = dst_mask;
2275 }
2276 else
2277 shader_glsl_mov_impl(ins, 0);
2278}
2279
2280/* Process the WINED3DSIO_MOV opcode using GLSL (dst = src) */
2281static void shader_glsl_mov_impl(const struct wined3d_shader_instruction *ins, int p0_idx)
2282
2283#else
2284/* Process the WINED3DSIO_MOV opcode using GLSL (dst = src) */
2285static void shader_glsl_mov(const struct wined3d_shader_instruction *ins)
2286#endif
2287{
2288 const struct wined3d_gl_info *gl_info = ins->ctx->gl_info;
2289 struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
2290 glsl_src_param_t src0_param;
2291 DWORD write_mask;
2292
2293#ifdef VBOX_WITH_VMSVGA
2294 if (ins->predicate)
2295 {
2296 shader_addline(buffer, "if (p0[%d]) {\n", p0_idx);
2297 }
2298#endif
2299
2300 write_mask = shader_glsl_append_dst(buffer, ins);
2301 shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src0_param);
2302
2303 /* In vs_1_1 WINED3DSIO_MOV can write to the address register. In later
2304 * shader versions WINED3DSIO_MOVA is used for this. */
2305 if (ins->ctx->reg_maps->shader_version.major == 1
2306 && !shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type)
2307 && ins->dst[0].reg.type == WINED3DSPR_ADDR)
2308 {
2309 /* This is a simple floor() */
2310 unsigned int mask_size = shader_glsl_get_write_mask_size(write_mask);
2311 if (mask_size > 1) {
2312 shader_addline(buffer, "ivec%d(floor(%s)));\n", mask_size, src0_param.param_str);
2313 } else {
2314 shader_addline(buffer, "int(floor(%s)));\n", src0_param.param_str);
2315 }
2316 }
2317 else if(ins->handler_idx == WINED3DSIH_MOVA)
2318 {
2319 /* We need to *round* to the nearest int here. */
2320 unsigned int mask_size = shader_glsl_get_write_mask_size(write_mask);
2321
2322 if (gl_info->supported[EXT_GPU_SHADER4])
2323 {
2324 if (mask_size > 1)
2325 shader_addline(buffer, "ivec%d(round(%s)));\n", mask_size, src0_param.param_str);
2326 else
2327 shader_addline(buffer, "int(round(%s)));\n", src0_param.param_str);
2328 }
2329 else
2330 {
2331 if (mask_size > 1)
2332 shader_addline(buffer, "ivec%d(floor(abs(%s) + vec%d(0.5)) * sign(%s)));\n",
2333 mask_size, src0_param.param_str, mask_size, src0_param.param_str);
2334 else
2335 shader_addline(buffer, "int(floor(abs(%s) + 0.5) * sign(%s)));\n",
2336 src0_param.param_str, src0_param.param_str);
2337 }
2338 }
2339 else
2340 {
2341 shader_addline(buffer, "%s);\n", src0_param.param_str);
2342 }
2343#ifdef VBOX_WITH_VMSVGA
2344 if (ins->predicate)
2345 {
2346 shader_addline(buffer, "}\n");
2347 }
2348#endif
2349}
2350
2351/* Process the dot product operators DP3 and DP4 in GLSL (dst = dot(src0, src1)) */
2352static void shader_glsl_dot(const struct wined3d_shader_instruction *ins)
2353{
2354 struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
2355 glsl_src_param_t src0_param;
2356 glsl_src_param_t src1_param;
2357 DWORD dst_write_mask, src_write_mask;
2358 unsigned int dst_size = 0;
2359
2360 dst_write_mask = shader_glsl_append_dst(buffer, ins);
2361 dst_size = shader_glsl_get_write_mask_size(dst_write_mask);
2362
2363 /* dp3 works on vec3, dp4 on vec4 */
2364 if (ins->handler_idx == WINED3DSIH_DP4)
2365 {
2366 src_write_mask = WINED3DSP_WRITEMASK_ALL;
2367 } else {
2368 src_write_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
2369 }
2370
2371 shader_glsl_add_src_param(ins, &ins->src[0], src_write_mask, &src0_param);
2372 shader_glsl_add_src_param(ins, &ins->src[1], src_write_mask, &src1_param);
2373
2374 if (dst_size > 1) {
2375 shader_addline(buffer, "vec%d(dot(%s, %s)));\n", dst_size, src0_param.param_str, src1_param.param_str);
2376 } else {
2377 shader_addline(buffer, "dot(%s, %s));\n", src0_param.param_str, src1_param.param_str);
2378 }
2379}
2380
2381/* Note that this instruction has some restrictions. The destination write mask
2382 * can't contain the w component, and the source swizzles have to be .xyzw */
2383static void shader_glsl_cross(const struct wined3d_shader_instruction *ins)
2384{
2385 DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
2386 glsl_src_param_t src0_param;
2387 glsl_src_param_t src1_param;
2388 char dst_mask[6];
2389
2390 shader_glsl_get_write_mask(&ins->dst[0], dst_mask);
2391 shader_glsl_append_dst(ins->ctx->buffer, ins);
2392 shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
2393 shader_glsl_add_src_param(ins, &ins->src[1], src_mask, &src1_param);
2394 shader_addline(ins->ctx->buffer, "cross(%s, %s)%s);\n", src0_param.param_str, src1_param.param_str, dst_mask);
2395}
2396
2397/* Process the WINED3DSIO_POW instruction in GLSL (dst = |src0|^src1)
2398 * Src0 and src1 are scalars. Note that D3D uses the absolute of src0, while
2399 * GLSL uses the value as-is. */
2400static void shader_glsl_pow(const struct wined3d_shader_instruction *ins)
2401{
2402 struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
2403 glsl_src_param_t src0_param;
2404 glsl_src_param_t src1_param;
2405 DWORD dst_write_mask;
2406 unsigned int dst_size;
2407
2408 dst_write_mask = shader_glsl_append_dst(buffer, ins);
2409 dst_size = shader_glsl_get_write_mask_size(dst_write_mask);
2410
2411 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src0_param);
2412 shader_glsl_add_src_param(ins, &ins->src[1], WINED3DSP_WRITEMASK_0, &src1_param);
2413
2414 if (dst_size > 1) {
2415 shader_addline(buffer, "vec%d(pow(abs(%s), %s)));\n", dst_size, src0_param.param_str, src1_param.param_str);
2416 } else {
2417 shader_addline(buffer, "pow(abs(%s), %s));\n", src0_param.param_str, src1_param.param_str);
2418 }
2419}
2420
2421/* Process the WINED3DSIO_LOG instruction in GLSL (dst = log2(|src0|))
2422 * Src0 is a scalar. Note that D3D uses the absolute of src0, while
2423 * GLSL uses the value as-is. */
2424static void shader_glsl_log(const struct wined3d_shader_instruction *ins)
2425{
2426 struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
2427 glsl_src_param_t src0_param;
2428 DWORD dst_write_mask;
2429 unsigned int dst_size;
2430
2431 dst_write_mask = shader_glsl_append_dst(buffer, ins);
2432 dst_size = shader_glsl_get_write_mask_size(dst_write_mask);
2433
2434 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src0_param);
2435
2436 if (dst_size > 1)
2437 {
2438 shader_addline(buffer, "vec%d(%s == 0.0 ? -FLT_MAX : log2(abs(%s))));\n",
2439 dst_size, src0_param.param_str, src0_param.param_str);
2440 }
2441 else
2442 {
2443 shader_addline(buffer, "%s == 0.0 ? -FLT_MAX : log2(abs(%s)));\n",
2444 src0_param.param_str, src0_param.param_str);
2445 }
2446}
2447
2448/* Map the opcode 1-to-1 to the GL code (arg->dst = instruction(src0, src1, ...) */
2449static void shader_glsl_map2gl(const struct wined3d_shader_instruction *ins)
2450{
2451 struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
2452 glsl_src_param_t src_param;
2453 const char *instruction;
2454 DWORD write_mask;
2455 unsigned i;
2456
2457 /* Determine the GLSL function to use based on the opcode */
2458 /* TODO: Possibly make this a table for faster lookups */
2459 switch (ins->handler_idx)
2460 {
2461 case WINED3DSIH_MIN: instruction = "min"; break;
2462 case WINED3DSIH_MAX: instruction = "max"; break;
2463 case WINED3DSIH_ABS: instruction = "abs"; break;
2464 case WINED3DSIH_FRC: instruction = "fract"; break;
2465 case WINED3DSIH_EXP: instruction = "exp2"; break;
2466 case WINED3DSIH_DSX: instruction = "dFdx"; break;
2467 case WINED3DSIH_DSY: instruction = "ycorrection.y * dFdy"; break;
2468 default: instruction = "";
2469 FIXME("Opcode %#x not yet handled in GLSL\n", ins->handler_idx);
2470 break;
2471 }
2472
2473 write_mask = shader_glsl_append_dst(buffer, ins);
2474
2475 shader_addline(buffer, "%s(", instruction);
2476
2477 if (ins->src_count)
2478 {
2479 shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src_param);
2480 shader_addline(buffer, "%s", src_param.param_str);
2481 for (i = 1; i < ins->src_count; ++i)
2482 {
2483 shader_glsl_add_src_param(ins, &ins->src[i], write_mask, &src_param);
2484 shader_addline(buffer, ", %s", src_param.param_str);
2485 }
2486 }
2487
2488 shader_addline(buffer, "));\n");
2489}
2490
2491static void shader_glsl_nrm(const struct wined3d_shader_instruction *ins)
2492{
2493 struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
2494 glsl_src_param_t src_param;
2495 unsigned int mask_size;
2496 DWORD write_mask;
2497 char dst_mask[6];
2498
2499 write_mask = shader_glsl_get_write_mask(ins->dst, dst_mask);
2500 mask_size = shader_glsl_get_write_mask_size(write_mask);
2501 shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src_param);
2502
2503 shader_addline(buffer, "tmp0.x = length(%s);\n", src_param.param_str);
2504 shader_glsl_append_dst(buffer, ins);
2505 if (mask_size > 1)
2506 {
2507 shader_addline(buffer, "tmp0.x == 0.0 ? vec%u(0.0) : (%s / tmp0.x));\n",
2508 mask_size, src_param.param_str);
2509 }
2510 else
2511 {
2512 shader_addline(buffer, "tmp0.x == 0.0 ? 0.0 : (%s / tmp0.x));\n",
2513 src_param.param_str);
2514 }
2515}
2516
2517/** Process the WINED3DSIO_EXPP instruction in GLSL:
2518 * For shader model 1.x, do the following (and honor the writemask, so use a temporary variable):
2519 * dst.x = 2^(floor(src))
2520 * dst.y = src - floor(src)
2521 * dst.z = 2^src (partial precision is allowed, but optional)
2522 * dst.w = 1.0;
2523 * For 2.0 shaders, just do this (honoring writemask and swizzle):
2524 * dst = 2^src; (partial precision is allowed, but optional)
2525 */
2526static void shader_glsl_expp(const struct wined3d_shader_instruction *ins)
2527{
2528 glsl_src_param_t src_param;
2529
2530 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src_param);
2531
2532 if (ins->ctx->reg_maps->shader_version.major < 2)
2533 {
2534 char dst_mask[6];
2535
2536 shader_addline(ins->ctx->buffer, "tmp0.x = exp2(floor(%s));\n", src_param.param_str);
2537 shader_addline(ins->ctx->buffer, "tmp0.y = %s - floor(%s);\n", src_param.param_str, src_param.param_str);
2538 shader_addline(ins->ctx->buffer, "tmp0.z = exp2(%s);\n", src_param.param_str);
2539 shader_addline(ins->ctx->buffer, "tmp0.w = 1.0;\n");
2540
2541 shader_glsl_append_dst(ins->ctx->buffer, ins);
2542 shader_glsl_get_write_mask(&ins->dst[0], dst_mask);
2543 shader_addline(ins->ctx->buffer, "tmp0%s);\n", dst_mask);
2544 } else {
2545 DWORD write_mask;
2546 unsigned int mask_size;
2547
2548 write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
2549 mask_size = shader_glsl_get_write_mask_size(write_mask);
2550
2551 if (mask_size > 1) {
2552 shader_addline(ins->ctx->buffer, "vec%d(exp2(%s)));\n", mask_size, src_param.param_str);
2553 } else {
2554 shader_addline(ins->ctx->buffer, "exp2(%s));\n", src_param.param_str);
2555 }
2556 }
2557}
2558
2559/** Process the RCP (reciprocal or inverse) opcode in GLSL (dst = 1 / src) */
2560static void shader_glsl_rcp(const struct wined3d_shader_instruction *ins)
2561{
2562 glsl_src_param_t src_param;
2563 DWORD write_mask;
2564 unsigned int mask_size;
2565
2566 write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
2567 mask_size = shader_glsl_get_write_mask_size(write_mask);
2568 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &src_param);
2569
2570 if (mask_size > 1)
2571 {
2572 shader_addline(ins->ctx->buffer, "vec%d(%s == 0.0 ? FLT_MAX : 1.0 / %s));\n",
2573 mask_size, src_param.param_str, src_param.param_str);
2574 }
2575 else
2576 {
2577 shader_addline(ins->ctx->buffer, "%s == 0.0 ? FLT_MAX : 1.0 / %s);\n",
2578 src_param.param_str, src_param.param_str);
2579 }
2580}
2581
2582static void shader_glsl_rsq(const struct wined3d_shader_instruction *ins)
2583{
2584 struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
2585 glsl_src_param_t src_param;
2586 DWORD write_mask;
2587 unsigned int mask_size;
2588
2589 write_mask = shader_glsl_append_dst(buffer, ins);
2590 mask_size = shader_glsl_get_write_mask_size(write_mask);
2591
2592 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &src_param);
2593
2594 if (mask_size > 1)
2595 {
2596 shader_addline(buffer, "vec%d(%s == 0.0 ? FLT_MAX : inversesqrt(abs(%s))));\n",
2597 mask_size, src_param.param_str, src_param.param_str);
2598 }
2599 else
2600 {
2601 shader_addline(buffer, "%s == 0.0 ? FLT_MAX : inversesqrt(abs(%s)));\n",
2602 src_param.param_str, src_param.param_str);
2603 }
2604}
2605
2606#ifdef VBOX_WITH_VMSVGA
2607static void shader_glsl_setp(const struct wined3d_shader_instruction *ins)
2608{
2609 struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
2610 glsl_src_param_t src_param1, src_param2;
2611 DWORD write_mask;
2612
2613 int i;
2614 DWORD dst_mask = ins->dst[0].write_mask;
2615 struct wined3d_shader_dst_param dst = ins->dst[0];
2616
2617 /* Cycle through all source0 channels */
2618 for (i=0; i<4; i++) {
2619 if (dst_mask & RT_BIT(i))
2620 {
2621 write_mask = WINED3DSP_WRITEMASK_0 << i;
2622 dst.write_mask = dst_mask & write_mask;
2623
2624 write_mask = shader_glsl_append_dst_ext(ins->ctx->buffer, ins, &dst);
2625 Assert(write_mask);
2626
2627 shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src_param1);
2628 shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src_param2);
2629
2630 shader_addline(buffer, "%s %s %s);\n",
2631 src_param1.param_str, shader_get_comp_op(ins->flags), src_param2.param_str);
2632 }
2633 }
2634}
2635#endif
2636
2637/** Process signed comparison opcodes in GLSL. */
2638static void shader_glsl_compare(const struct wined3d_shader_instruction *ins)
2639{
2640 glsl_src_param_t src0_param;
2641 glsl_src_param_t src1_param;
2642 DWORD write_mask;
2643 unsigned int mask_size;
2644
2645 write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
2646 mask_size = shader_glsl_get_write_mask_size(write_mask);
2647 shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src0_param);
2648 shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param);
2649
2650 if (mask_size > 1) {
2651 const char *compare;
2652
2653 switch(ins->handler_idx)
2654 {
2655 case WINED3DSIH_SLT: compare = "lessThan"; break;
2656 case WINED3DSIH_SGE: compare = "greaterThanEqual"; break;
2657 default: compare = "";
2658 FIXME("Can't handle opcode %#x\n", ins->handler_idx);
2659 }
2660
2661 shader_addline(ins->ctx->buffer, "vec%d(%s(%s, %s)));\n", mask_size, compare,
2662 src0_param.param_str, src1_param.param_str);
2663 } else {
2664 switch(ins->handler_idx)
2665 {
2666 case WINED3DSIH_SLT:
2667 /* Step(src0, src1) is not suitable here because if src0 == src1 SLT is supposed,
2668 * to return 0.0 but step returns 1.0 because step is not < x
2669 * An alternative is a bvec compare padded with an unused second component.
2670 * step(src1 * -1.0, src0 * -1.0) is not an option because it suffers from the same
2671 * issue. Playing with not() is not possible either because not() does not accept
2672 * a scalar.
2673 */
2674 shader_addline(ins->ctx->buffer, "(%s < %s) ? 1.0 : 0.0);\n",
2675 src0_param.param_str, src1_param.param_str);
2676 break;
2677 case WINED3DSIH_SGE:
2678 /* Here we can use the step() function and safe a conditional */
2679 shader_addline(ins->ctx->buffer, "step(%s, %s));\n", src1_param.param_str, src0_param.param_str);
2680 break;
2681 default:
2682 FIXME("Can't handle opcode %#x\n", ins->handler_idx);
2683 }
2684
2685 }
2686}
2687
2688/** Process CMP instruction in GLSL (dst = src0 >= 0.0 ? src1 : src2), per channel */
2689static void shader_glsl_cmp(const struct wined3d_shader_instruction *ins)
2690{
2691 glsl_src_param_t src0_param;
2692 glsl_src_param_t src1_param;
2693 glsl_src_param_t src2_param;
2694 DWORD write_mask, cmp_channel = 0;
2695 unsigned int i, j;
2696 char mask_char[6];
2697 BOOL temp_destination = FALSE;
2698
2699 if (shader_is_scalar(&ins->src[0].reg))
2700 {
2701 write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
2702
2703 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_ALL, &src0_param);
2704 shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param);
2705 shader_glsl_add_src_param(ins, &ins->src[2], write_mask, &src2_param);
2706
2707 shader_addline(ins->ctx->buffer, "%s >= 0.0 ? %s : %s);\n",
2708 src0_param.param_str, src1_param.param_str, src2_param.param_str);
2709 } else {
2710 DWORD dst_mask = ins->dst[0].write_mask;
2711 struct wined3d_shader_dst_param dst = ins->dst[0];
2712
2713 /* Cycle through all source0 channels */
2714 for (i=0; i<4; i++) {
2715 write_mask = 0;
2716 /* Find the destination channels which use the current source0 channel */
2717 for (j=0; j<4; j++) {
2718 if (((ins->src[0].swizzle >> (2 * j)) & 0x3) == i)
2719 {
2720 write_mask |= WINED3DSP_WRITEMASK_0 << j;
2721 cmp_channel = WINED3DSP_WRITEMASK_0 << j;
2722 }
2723 }
2724 dst.write_mask = dst_mask & write_mask;
2725
2726 /* Splitting the cmp instruction up in multiple lines imposes a problem:
2727 * The first lines may overwrite source parameters of the following lines.
2728 * Deal with that by using a temporary destination register if needed
2729 */
2730 if ((ins->src[0].reg.idx == ins->dst[0].reg.idx
2731 && ins->src[0].reg.type == ins->dst[0].reg.type)
2732 || (ins->src[1].reg.idx == ins->dst[0].reg.idx
2733 && ins->src[1].reg.type == ins->dst[0].reg.type)
2734 || (ins->src[2].reg.idx == ins->dst[0].reg.idx
2735 && ins->src[2].reg.type == ins->dst[0].reg.type))
2736 {
2737 write_mask = shader_glsl_get_write_mask(&dst, mask_char);
2738 if (!write_mask) continue;
2739 shader_addline(ins->ctx->buffer, "tmp0%s = (", mask_char);
2740 temp_destination = TRUE;
2741 } else {
2742 write_mask = shader_glsl_append_dst_ext(ins->ctx->buffer, ins, &dst);
2743 if (!write_mask) continue;
2744 }
2745
2746 shader_glsl_add_src_param(ins, &ins->src[0], cmp_channel, &src0_param);
2747 shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param);
2748 shader_glsl_add_src_param(ins, &ins->src[2], write_mask, &src2_param);
2749
2750 shader_addline(ins->ctx->buffer, "%s >= 0.0 ? %s : %s);\n",
2751 src0_param.param_str, src1_param.param_str, src2_param.param_str);
2752 }
2753
2754 if(temp_destination) {
2755 shader_glsl_get_write_mask(&ins->dst[0], mask_char);
2756 shader_glsl_append_dst(ins->ctx->buffer, ins);
2757 shader_addline(ins->ctx->buffer, "tmp0%s);\n", mask_char);
2758 }
2759 }
2760
2761}
2762
2763/** Process the CND opcode in GLSL (dst = (src0 > 0.5) ? src1 : src2) */
2764/* For ps 1.1-1.3, only a single component of src0 is used. For ps 1.4
2765 * the compare is done per component of src0. */
2766static void shader_glsl_cnd(const struct wined3d_shader_instruction *ins)
2767{
2768 struct wined3d_shader_dst_param dst;
2769 glsl_src_param_t src0_param;
2770 glsl_src_param_t src1_param;
2771 glsl_src_param_t src2_param;
2772 DWORD write_mask, cmp_channel = 0;
2773 unsigned int i, j;
2774 DWORD dst_mask;
2775 DWORD shader_version = WINED3D_SHADER_VERSION(ins->ctx->reg_maps->shader_version.major,
2776 ins->ctx->reg_maps->shader_version.minor);
2777
2778 if (shader_version < WINED3D_SHADER_VERSION(1, 4))
2779 {
2780 write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
2781 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src0_param);
2782 shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param);
2783 shader_glsl_add_src_param(ins, &ins->src[2], write_mask, &src2_param);
2784
2785 /* Fun: The D3DSI_COISSUE flag changes the semantic of the cnd instruction for < 1.4 shaders */
2786 if (ins->coissue)
2787 {
2788 shader_addline(ins->ctx->buffer, "%s /* COISSUE! */);\n", src1_param.param_str);
2789 } else {
2790 shader_addline(ins->ctx->buffer, "%s > 0.5 ? %s : %s);\n",
2791 src0_param.param_str, src1_param.param_str, src2_param.param_str);
2792 }
2793 return;
2794 }
2795 /* Cycle through all source0 channels */
2796 dst_mask = ins->dst[0].write_mask;
2797 dst = ins->dst[0];
2798 for (i=0; i<4; i++) {
2799 write_mask = 0;
2800 /* Find the destination channels which use the current source0 channel */
2801 for (j=0; j<4; j++) {
2802 if (((ins->src[0].swizzle >> (2 * j)) & 0x3) == i)
2803 {
2804 write_mask |= WINED3DSP_WRITEMASK_0 << j;
2805 cmp_channel = WINED3DSP_WRITEMASK_0 << j;
2806 }
2807 }
2808
2809 dst.write_mask = dst_mask & write_mask;
2810 write_mask = shader_glsl_append_dst_ext(ins->ctx->buffer, ins, &dst);
2811 if (!write_mask) continue;
2812
2813 shader_glsl_add_src_param(ins, &ins->src[0], cmp_channel, &src0_param);
2814 shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param);
2815 shader_glsl_add_src_param(ins, &ins->src[2], write_mask, &src2_param);
2816
2817 shader_addline(ins->ctx->buffer, "%s > 0.5 ? %s : %s);\n",
2818 src0_param.param_str, src1_param.param_str, src2_param.param_str);
2819 }
2820}
2821
2822/** GLSL code generation for WINED3DSIO_MAD: Multiply the first 2 opcodes, then add the last */
2823static void shader_glsl_mad(const struct wined3d_shader_instruction *ins)
2824{
2825 glsl_src_param_t src0_param;
2826 glsl_src_param_t src1_param;
2827 glsl_src_param_t src2_param;
2828 DWORD write_mask;
2829
2830 write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
2831 shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src0_param);
2832 shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param);
2833 shader_glsl_add_src_param(ins, &ins->src[2], write_mask, &src2_param);
2834 shader_addline(ins->ctx->buffer, "(%s * %s) + %s);\n",
2835 src0_param.param_str, src1_param.param_str, src2_param.param_str);
2836}
2837
2838/* Handles transforming all WINED3DSIO_M?x? opcodes for
2839 Vertex shaders to GLSL codes */
2840static void shader_glsl_mnxn(const struct wined3d_shader_instruction *ins)
2841{
2842 int i;
2843 int nComponents = 0;
2844 struct wined3d_shader_dst_param tmp_dst = {{0}};
2845 struct wined3d_shader_src_param tmp_src[2] = {{{0}}};
2846 struct wined3d_shader_instruction tmp_ins;
2847
2848 memset(&tmp_ins, 0, sizeof(tmp_ins));
2849
2850 /* Set constants for the temporary argument */
2851 tmp_ins.ctx = ins->ctx;
2852 tmp_ins.dst_count = 1;
2853 tmp_ins.dst = &tmp_dst;
2854 tmp_ins.src_count = 2;
2855 tmp_ins.src = tmp_src;
2856
2857 switch(ins->handler_idx)
2858 {
2859 case WINED3DSIH_M4x4:
2860 nComponents = 4;
2861 tmp_ins.handler_idx = WINED3DSIH_DP4;
2862 break;
2863 case WINED3DSIH_M4x3:
2864 nComponents = 3;
2865 tmp_ins.handler_idx = WINED3DSIH_DP4;
2866 break;
2867 case WINED3DSIH_M3x4:
2868 nComponents = 4;
2869 tmp_ins.handler_idx = WINED3DSIH_DP3;
2870 break;
2871 case WINED3DSIH_M3x3:
2872 nComponents = 3;
2873 tmp_ins.handler_idx = WINED3DSIH_DP3;
2874 break;
2875 case WINED3DSIH_M3x2:
2876 nComponents = 2;
2877 tmp_ins.handler_idx = WINED3DSIH_DP3;
2878 break;
2879 default:
2880 break;
2881 }
2882
2883 tmp_dst = ins->dst[0];
2884 tmp_src[0] = ins->src[0];
2885 tmp_src[1] = ins->src[1];
2886 for (i = 0; i < nComponents; ++i)
2887 {
2888 tmp_dst.write_mask = WINED3DSP_WRITEMASK_0 << i;
2889 shader_glsl_dot(&tmp_ins);
2890 ++tmp_src[1].reg.idx;
2891 }
2892}
2893
2894/**
2895 The LRP instruction performs a component-wise linear interpolation
2896 between the second and third operands using the first operand as the
2897 blend factor. Equation: (dst = src2 + src0 * (src1 - src2))
2898 This is equivalent to mix(src2, src1, src0);
2899*/
2900static void shader_glsl_lrp(const struct wined3d_shader_instruction *ins)
2901{
2902 glsl_src_param_t src0_param;
2903 glsl_src_param_t src1_param;
2904 glsl_src_param_t src2_param;
2905 DWORD write_mask;
2906
2907 write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
2908
2909 shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src0_param);
2910 shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param);
2911 shader_glsl_add_src_param(ins, &ins->src[2], write_mask, &src2_param);
2912
2913 shader_addline(ins->ctx->buffer, "mix(%s, %s, %s));\n",
2914 src2_param.param_str, src1_param.param_str, src0_param.param_str);
2915}
2916
2917/** Process the WINED3DSIO_LIT instruction in GLSL:
2918 * dst.x = dst.w = 1.0
2919 * dst.y = (src0.x > 0) ? src0.x
2920 * dst.z = (src0.x > 0) ? ((src0.y > 0) ? pow(src0.y, src.w) : 0) : 0
2921 * where src.w is clamped at +- 128
2922 */
2923static void shader_glsl_lit(const struct wined3d_shader_instruction *ins)
2924{
2925 glsl_src_param_t src0_param;
2926 glsl_src_param_t src1_param;
2927 glsl_src_param_t src3_param;
2928 char dst_mask[6];
2929
2930 shader_glsl_append_dst(ins->ctx->buffer, ins);
2931 shader_glsl_get_write_mask(&ins->dst[0], dst_mask);
2932
2933 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src0_param);
2934 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_1, &src1_param);
2935 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &src3_param);
2936
2937 /* The sdk specifies the instruction like this
2938 * dst.x = 1.0;
2939 * if(src.x > 0.0) dst.y = src.x
2940 * else dst.y = 0.0.
2941 * if(src.x > 0.0 && src.y > 0.0) dst.z = pow(src.y, power);
2942 * else dst.z = 0.0;
2943 * dst.w = 1.0;
2944 *
2945 * Obviously that has quite a few conditionals in it which we don't like. So the first step is this:
2946 * dst.x = 1.0 ... No further explanation needed
2947 * dst.y = max(src.y, 0.0); ... If x < 0.0, use 0.0, otherwise x. Same as the conditional
2948 * dst.z = x > 0.0 ? pow(max(y, 0.0), p) : 0; ... 0 ^ power is 0, and otherwise we use y anyway
2949 * dst.w = 1.0. ... Nothing fancy.
2950 *
2951 * So we still have one conditional in there. So do this:
2952 * dst.z = pow(max(0.0, src.y) * step(0.0, src.x), power);
2953 *
2954 * step(0.0, x) will return 1 if src.x > 0.0, and 0 otherwise. So if y is 0 we get pow(0.0 * 1.0, power),
2955 * which sets dst.z to 0. If y > 0, but x = 0.0, we get pow(y * 0.0, power), which results in 0 too.
2956 * if both x and y are > 0, we get pow(y * 1.0, power), as it is supposed to
2957 */
2958 shader_addline(ins->ctx->buffer,
2959 "vec4(1.0, max(%s, 0.0), pow(max(0.0, %s) * step(0.0, %s), clamp(%s, -128.0, 128.0)), 1.0)%s);\n",
2960 src0_param.param_str, src1_param.param_str, src0_param.param_str, src3_param.param_str, dst_mask);
2961}
2962
2963/** Process the WINED3DSIO_DST instruction in GLSL:
2964 * dst.x = 1.0
2965 * dst.y = src0.x * src0.y
2966 * dst.z = src0.z
2967 * dst.w = src1.w
2968 */
2969static void shader_glsl_dst(const struct wined3d_shader_instruction *ins)
2970{
2971 glsl_src_param_t src0y_param;
2972 glsl_src_param_t src0z_param;
2973 glsl_src_param_t src1y_param;
2974 glsl_src_param_t src1w_param;
2975 char dst_mask[6];
2976
2977 shader_glsl_append_dst(ins->ctx->buffer, ins);
2978 shader_glsl_get_write_mask(&ins->dst[0], dst_mask);
2979
2980 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_1, &src0y_param);
2981 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_2, &src0z_param);
2982 shader_glsl_add_src_param(ins, &ins->src[1], WINED3DSP_WRITEMASK_1, &src1y_param);
2983 shader_glsl_add_src_param(ins, &ins->src[1], WINED3DSP_WRITEMASK_3, &src1w_param);
2984
2985 shader_addline(ins->ctx->buffer, "vec4(1.0, %s * %s, %s, %s))%s;\n",
2986 src0y_param.param_str, src1y_param.param_str, src0z_param.param_str, src1w_param.param_str, dst_mask);
2987}
2988
2989/** Process the WINED3DSIO_SINCOS instruction in GLSL:
2990 * VS 2.0 requires that specific cosine and sine constants be passed to this instruction so the hardware
2991 * can handle it. But, these functions are built-in for GLSL, so we can just ignore the last 2 params.
2992 *
2993 * dst.x = cos(src0.?)
2994 * dst.y = sin(src0.?)
2995 * dst.z = dst.z
2996 * dst.w = dst.w
2997 */
2998static void shader_glsl_sincos(const struct wined3d_shader_instruction *ins)
2999{
3000 glsl_src_param_t src0_param;
3001 DWORD write_mask;
3002
3003 write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
3004 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src0_param);
3005
3006 switch (write_mask) {
3007 case WINED3DSP_WRITEMASK_0:
3008 shader_addline(ins->ctx->buffer, "cos(%s));\n", src0_param.param_str);
3009 break;
3010
3011 case WINED3DSP_WRITEMASK_1:
3012 shader_addline(ins->ctx->buffer, "sin(%s));\n", src0_param.param_str);
3013 break;
3014
3015 case (WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1):
3016 shader_addline(ins->ctx->buffer, "vec2(cos(%s), sin(%s)));\n", src0_param.param_str, src0_param.param_str);
3017 break;
3018
3019 default:
3020 ERR("Write mask should be .x, .y or .xy\n");
3021 break;
3022 }
3023}
3024
3025/* sgn in vs_2_0 has 2 extra parameters(registers for temporary storage) which we don't use
3026 * here. But those extra parameters require a dedicated function for sgn, since map2gl would
3027 * generate invalid code
3028 */
3029static void shader_glsl_sgn(const struct wined3d_shader_instruction *ins)
3030{
3031 glsl_src_param_t src0_param;
3032 DWORD write_mask;
3033
3034 write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
3035 shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src0_param);
3036
3037 shader_addline(ins->ctx->buffer, "sign(%s));\n", src0_param.param_str);
3038}
3039
3040/** Process the WINED3DSIO_LOOP instruction in GLSL:
3041 * Start a for() loop where src1.y is the initial value of aL,
3042 * increment aL by src1.z for a total of src1.x iterations.
3043 * Need to use a temporary variable for this operation.
3044 */
3045/* FIXME: I don't think nested loops will work correctly this way. */
3046static void shader_glsl_loop(const struct wined3d_shader_instruction *ins)
3047{
3048 glsl_src_param_t src1_param;
3049 IWineD3DBaseShaderImpl *shader = (IWineD3DBaseShaderImpl *)ins->ctx->shader;
3050 const DWORD *control_values = NULL;
3051 const local_constant *constant;
3052
3053 shader_glsl_add_src_param(ins, &ins->src[1], WINED3DSP_WRITEMASK_ALL, &src1_param);
3054
3055 /* Try to hardcode the loop control parameters if possible. Direct3D 9 class hardware doesn't support real
3056 * varying indexing, but Microsoft designed this feature for Shader model 2.x+. If the loop control is
3057 * known at compile time, the GLSL compiler can unroll the loop, and replace indirect addressing with direct
3058 * addressing.
3059 */
3060 if (ins->src[1].reg.type == WINED3DSPR_CONSTINT)
3061 {
3062 LIST_FOR_EACH_ENTRY(constant, &shader->baseShader.constantsI, local_constant, entry) {
3063 if (constant->idx == ins->src[1].reg.idx)
3064 {
3065 control_values = constant->value;
3066 break;
3067 }
3068 }
3069 }
3070
3071 if (control_values)
3072 {
3073 struct wined3d_shader_loop_control loop_control;
3074 loop_control.count = control_values[0];
3075 loop_control.start = control_values[1];
3076 loop_control.step = (int)control_values[2];
3077
3078 if (loop_control.step > 0)
3079 {
3080 shader_addline(ins->ctx->buffer, "for (aL%u = %u; aL%u < (%u * %d + %u); aL%u += %d) {\n",
3081 shader->baseShader.cur_loop_depth, loop_control.start,
3082 shader->baseShader.cur_loop_depth, loop_control.count, loop_control.step, loop_control.start,
3083 shader->baseShader.cur_loop_depth, loop_control.step);
3084 }
3085 else if (loop_control.step < 0)
3086 {
3087 shader_addline(ins->ctx->buffer, "for (aL%u = %u; aL%u > (%u * %d + %u); aL%u += %d) {\n",
3088 shader->baseShader.cur_loop_depth, loop_control.start,
3089 shader->baseShader.cur_loop_depth, loop_control.count, loop_control.step, loop_control.start,
3090 shader->baseShader.cur_loop_depth, loop_control.step);
3091 }
3092 else
3093 {
3094 shader_addline(ins->ctx->buffer, "for (aL%u = %u, tmpInt%u = 0; tmpInt%u < %u; tmpInt%u++) {\n",
3095 shader->baseShader.cur_loop_depth, loop_control.start, shader->baseShader.cur_loop_depth,
3096 shader->baseShader.cur_loop_depth, loop_control.count,
3097 shader->baseShader.cur_loop_depth);
3098 }
3099 } else {
3100 shader_addline(ins->ctx->buffer,
3101 "for (tmpInt%u = 0, aL%u = %s.y; tmpInt%u < %s.x; tmpInt%u++, aL%u += %s.z) {\n",
3102 shader->baseShader.cur_loop_depth, shader->baseShader.cur_loop_regno,
3103 src1_param.reg_name, shader->baseShader.cur_loop_depth, src1_param.reg_name,
3104 shader->baseShader.cur_loop_depth, shader->baseShader.cur_loop_regno, src1_param.reg_name);
3105 }
3106
3107 shader->baseShader.cur_loop_depth++;
3108 shader->baseShader.cur_loop_regno++;
3109}
3110
3111static void shader_glsl_end(const struct wined3d_shader_instruction *ins)
3112{
3113 IWineD3DBaseShaderImpl *shader = (IWineD3DBaseShaderImpl *)ins->ctx->shader;
3114
3115 shader_addline(ins->ctx->buffer, "}\n");
3116
3117 if (ins->handler_idx == WINED3DSIH_ENDLOOP)
3118 {
3119 shader->baseShader.cur_loop_depth--;
3120 shader->baseShader.cur_loop_regno--;
3121 }
3122
3123 if (ins->handler_idx == WINED3DSIH_ENDREP)
3124 {
3125 shader->baseShader.cur_loop_depth--;
3126 }
3127}
3128
3129static void shader_glsl_rep(const struct wined3d_shader_instruction *ins)
3130{
3131 IWineD3DBaseShaderImpl *shader = (IWineD3DBaseShaderImpl *)ins->ctx->shader;
3132 glsl_src_param_t src0_param;
3133 const DWORD *control_values = NULL;
3134 const local_constant *constant;
3135
3136 /* Try to hardcode local values to help the GLSL compiler to unroll and optimize the loop */
3137 if (ins->src[0].reg.type == WINED3DSPR_CONSTINT)
3138 {
3139 LIST_FOR_EACH_ENTRY(constant, &shader->baseShader.constantsI, local_constant, entry)
3140 {
3141 if (constant->idx == ins->src[0].reg.idx)
3142 {
3143 control_values = constant->value;
3144 break;
3145 }
3146 }
3147 }
3148
3149 if(control_values) {
3150 shader_addline(ins->ctx->buffer, "for (tmpInt%d = 0; tmpInt%d < %d; tmpInt%d++) {\n",
3151 shader->baseShader.cur_loop_depth, shader->baseShader.cur_loop_depth,
3152 control_values[0], shader->baseShader.cur_loop_depth);
3153 } else {
3154 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src0_param);
3155 shader_addline(ins->ctx->buffer, "for (tmpInt%d = 0; tmpInt%d < %s; tmpInt%d++) {\n",
3156 shader->baseShader.cur_loop_depth, shader->baseShader.cur_loop_depth,
3157 src0_param.param_str, shader->baseShader.cur_loop_depth);
3158 }
3159 shader->baseShader.cur_loop_depth++;
3160}
3161
3162static void shader_glsl_if(const struct wined3d_shader_instruction *ins)
3163{
3164 glsl_src_param_t src0_param;
3165
3166 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src0_param);
3167 shader_addline(ins->ctx->buffer, "if (%s) {\n", src0_param.param_str);
3168}
3169
3170static void shader_glsl_ifc(const struct wined3d_shader_instruction *ins)
3171{
3172 glsl_src_param_t src0_param;
3173 glsl_src_param_t src1_param;
3174
3175 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src0_param);
3176 shader_glsl_add_src_param(ins, &ins->src[1], WINED3DSP_WRITEMASK_0, &src1_param);
3177
3178 shader_addline(ins->ctx->buffer, "if (%s %s %s) {\n",
3179 src0_param.param_str, shader_get_comp_op(ins->flags), src1_param.param_str);
3180}
3181
3182static void shader_glsl_else(const struct wined3d_shader_instruction *ins)
3183{
3184 shader_addline(ins->ctx->buffer, "} else {\n");
3185}
3186
3187static void shader_glsl_break(const struct wined3d_shader_instruction *ins)
3188{
3189 shader_addline(ins->ctx->buffer, "break;\n");
3190}
3191
3192/* FIXME: According to MSDN the compare is done per component. */
3193static void shader_glsl_breakc(const struct wined3d_shader_instruction *ins)
3194{
3195 glsl_src_param_t src0_param;
3196 glsl_src_param_t src1_param;
3197
3198 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src0_param);
3199 shader_glsl_add_src_param(ins, &ins->src[1], WINED3DSP_WRITEMASK_0, &src1_param);
3200
3201 shader_addline(ins->ctx->buffer, "if (%s %s %s) break;\n",
3202 src0_param.param_str, shader_get_comp_op(ins->flags), src1_param.param_str);
3203}
3204
3205static void shader_glsl_label(const struct wined3d_shader_instruction *ins)
3206{
3207 shader_addline(ins->ctx->buffer, "}\n");
3208 shader_addline(ins->ctx->buffer, "void subroutine%u () {\n", ins->src[0].reg.idx);
3209}
3210
3211static void shader_glsl_call(const struct wined3d_shader_instruction *ins)
3212{
3213 shader_addline(ins->ctx->buffer, "subroutine%u();\n", ins->src[0].reg.idx);
3214}
3215
3216static void shader_glsl_callnz(const struct wined3d_shader_instruction *ins)
3217{
3218 glsl_src_param_t src1_param;
3219
3220 shader_glsl_add_src_param(ins, &ins->src[1], WINED3DSP_WRITEMASK_0, &src1_param);
3221 shader_addline(ins->ctx->buffer, "if (%s) subroutine%u();\n", src1_param.param_str, ins->src[0].reg.idx);
3222}
3223
3224static void shader_glsl_ret(const struct wined3d_shader_instruction *ins)
3225{
3226 /* No-op. The closing } is written when a new function is started, and at the end of the shader. This
3227 * function only suppresses the unhandled instruction warning
3228 */
3229}
3230
3231/*********************************************
3232 * Pixel Shader Specific Code begins here
3233 ********************************************/
3234static void shader_glsl_tex(const struct wined3d_shader_instruction *ins)
3235{
3236 IWineD3DBaseShaderImpl *shader = (IWineD3DBaseShaderImpl *)ins->ctx->shader;
3237 IWineD3DDeviceImpl *deviceImpl = (IWineD3DDeviceImpl *)shader->baseShader.device;
3238 DWORD shader_version = WINED3D_SHADER_VERSION(ins->ctx->reg_maps->shader_version.major,
3239 ins->ctx->reg_maps->shader_version.minor);
3240 const struct wined3d_gl_info *gl_info = ins->ctx->gl_info;
3241 glsl_sample_function_t sample_function;
3242 DWORD sample_flags = 0;
3243 WINED3DSAMPLER_TEXTURE_TYPE sampler_type;
3244 DWORD sampler_idx;
3245 DWORD mask = 0, swizzle;
3246
3247 /* 1.0-1.4: Use destination register as sampler source.
3248 * 2.0+: Use provided sampler source. */
3249 if (shader_version < WINED3D_SHADER_VERSION(2,0)) sampler_idx = ins->dst[0].reg.idx;
3250 else sampler_idx = ins->src[1].reg.idx;
3251
3252 AssertReturnVoid(sampler_idx < RT_ELEMENTS(ins->ctx->reg_maps->sampler_type));
3253 sampler_type = ins->ctx->reg_maps->sampler_type[sampler_idx];
3254
3255 if (shader_version < WINED3D_SHADER_VERSION(1,4))
3256 {
3257 const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data;
3258 DWORD flags = (priv->cur_ps_args->tex_transform >> (sampler_idx * WINED3D_PSARGS_TEXTRANSFORM_SHIFT))
3259 & WINED3D_PSARGS_TEXTRANSFORM_MASK;
3260
3261 /* Projected cube textures don't make a lot of sense, the resulting coordinates stay the same. */
3262 if (flags & WINED3D_PSARGS_PROJECTED && sampler_type != WINED3DSTT_CUBE) {
3263 sample_flags |= WINED3D_GLSL_SAMPLE_PROJECTED;
3264 switch (flags & ~WINED3D_PSARGS_PROJECTED) {
3265 case WINED3DTTFF_COUNT1: FIXME("WINED3DTTFF_PROJECTED with WINED3DTTFF_COUNT1?\n"); break;
3266 case WINED3DTTFF_COUNT2: mask = WINED3DSP_WRITEMASK_1; break;
3267 case WINED3DTTFF_COUNT3: mask = WINED3DSP_WRITEMASK_2; break;
3268 case WINED3DTTFF_COUNT4:
3269 case WINED3DTTFF_DISABLE: mask = WINED3DSP_WRITEMASK_3; break;
3270 }
3271 }
3272 }
3273 else if (shader_version < WINED3D_SHADER_VERSION(2,0))
3274 {
3275 DWORD src_mod = ins->src[0].modifiers;
3276
3277 if (src_mod == WINED3DSPSM_DZ) {
3278 sample_flags |= WINED3D_GLSL_SAMPLE_PROJECTED;
3279 mask = WINED3DSP_WRITEMASK_2;
3280 } else if (src_mod == WINED3DSPSM_DW) {
3281 sample_flags |= WINED3D_GLSL_SAMPLE_PROJECTED;
3282 mask = WINED3DSP_WRITEMASK_3;
3283 }
3284 } else {
3285 if (ins->flags & WINED3DSI_TEXLD_PROJECT)
3286 {
3287 /* ps 2.0 texldp instruction always divides by the fourth component. */
3288 sample_flags |= WINED3D_GLSL_SAMPLE_PROJECTED;
3289 mask = WINED3DSP_WRITEMASK_3;
3290 }
3291 }
3292
3293 if(deviceImpl->stateBlock->textures[sampler_idx] &&
3294 IWineD3DBaseTexture_GetTextureDimensions(deviceImpl->stateBlock->textures[sampler_idx]) == GL_TEXTURE_RECTANGLE_ARB) {
3295 sample_flags |= WINED3D_GLSL_SAMPLE_RECT;
3296 }
3297
3298 shader_glsl_get_sample_function(gl_info, sampler_type, sample_flags, &sample_function);
3299 mask |= sample_function.coord_mask;
3300
3301 if (shader_version < WINED3D_SHADER_VERSION(2,0)) swizzle = WINED3DSP_NOSWIZZLE;
3302 else swizzle = ins->src[1].swizzle;
3303
3304 /* 1.0-1.3: Use destination register as coordinate source.
3305 1.4+: Use provided coordinate source register. */
3306 if (shader_version < WINED3D_SHADER_VERSION(1,4))
3307 {
3308 char coord_mask[6];
3309 shader_glsl_write_mask_to_str(mask, coord_mask);
3310 shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, swizzle, NULL, NULL, NULL,
3311 "T%u%s", sampler_idx, coord_mask);
3312 } else {
3313 glsl_src_param_t coord_param;
3314 shader_glsl_add_src_param(ins, &ins->src[0], mask, &coord_param);
3315 if (ins->flags & WINED3DSI_TEXLD_BIAS)
3316 {
3317 glsl_src_param_t bias;
3318 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &bias);
3319 shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, swizzle, NULL, NULL, bias.param_str,
3320 "%s", coord_param.param_str);
3321 } else {
3322 shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, swizzle, NULL, NULL, NULL,
3323 "%s", coord_param.param_str);
3324 }
3325 }
3326}
3327
3328static void shader_glsl_texldd(const struct wined3d_shader_instruction *ins)
3329{
3330 IWineD3DBaseShaderImpl *This = (IWineD3DBaseShaderImpl *)ins->ctx->shader;
3331 IWineD3DDeviceImpl* deviceImpl = (IWineD3DDeviceImpl*) This->baseShader.device;
3332 const struct wined3d_gl_info *gl_info = ins->ctx->gl_info;
3333 glsl_sample_function_t sample_function;
3334 glsl_src_param_t coord_param, dx_param, dy_param;
3335 DWORD sample_flags = WINED3D_GLSL_SAMPLE_GRAD;
3336 DWORD sampler_type;
3337 DWORD sampler_idx;
3338 DWORD swizzle = ins->src[1].swizzle;
3339
3340 if (!gl_info->supported[ARB_SHADER_TEXTURE_LOD] && !gl_info->supported[EXT_GPU_SHADER4])
3341 {
3342 FIXME("texldd used, but not supported by hardware. Falling back to regular tex\n");
3343 shader_glsl_tex(ins);
3344 return;
3345 }
3346
3347 sampler_idx = ins->src[1].reg.idx;
3348 AssertReturnVoid(sampler_idx < RT_ELEMENTS(ins->ctx->reg_maps->sampler_type));
3349
3350 sampler_type = ins->ctx->reg_maps->sampler_type[sampler_idx];
3351 if(deviceImpl->stateBlock->textures[sampler_idx] &&
3352 IWineD3DBaseTexture_GetTextureDimensions(deviceImpl->stateBlock->textures[sampler_idx]) == GL_TEXTURE_RECTANGLE_ARB) {
3353 sample_flags |= WINED3D_GLSL_SAMPLE_RECT;
3354 }
3355
3356 shader_glsl_get_sample_function(gl_info, sampler_type, sample_flags, &sample_function);
3357 shader_glsl_add_src_param(ins, &ins->src[0], sample_function.coord_mask, &coord_param);
3358 shader_glsl_add_src_param(ins, &ins->src[2], sample_function.coord_mask, &dx_param);
3359 shader_glsl_add_src_param(ins, &ins->src[3], sample_function.coord_mask, &dy_param);
3360
3361 shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, swizzle, dx_param.param_str, dy_param.param_str, NULL,
3362 "%s", coord_param.param_str);
3363}
3364
3365static void shader_glsl_texldl(const struct wined3d_shader_instruction *ins)
3366{
3367 IWineD3DBaseShaderImpl *This = (IWineD3DBaseShaderImpl *)ins->ctx->shader;
3368 IWineD3DDeviceImpl* deviceImpl = (IWineD3DDeviceImpl*) This->baseShader.device;
3369 const struct wined3d_gl_info *gl_info = ins->ctx->gl_info;
3370 glsl_sample_function_t sample_function;
3371 glsl_src_param_t coord_param, lod_param;
3372 DWORD sample_flags = WINED3D_GLSL_SAMPLE_LOD;
3373 DWORD sampler_type;
3374 DWORD sampler_idx;
3375 DWORD swizzle = ins->src[1].swizzle;
3376
3377 sampler_idx = ins->src[1].reg.idx;
3378 AssertReturnVoid(sampler_idx < RT_ELEMENTS(ins->ctx->reg_maps->sampler_type));
3379
3380 sampler_type = ins->ctx->reg_maps->sampler_type[sampler_idx];
3381 if(deviceImpl->stateBlock->textures[sampler_idx] &&
3382 IWineD3DBaseTexture_GetTextureDimensions(deviceImpl->stateBlock->textures[sampler_idx]) == GL_TEXTURE_RECTANGLE_ARB) {
3383 sample_flags |= WINED3D_GLSL_SAMPLE_RECT;
3384 }
3385 shader_glsl_get_sample_function(gl_info, sampler_type, sample_flags, &sample_function);
3386 shader_glsl_add_src_param(ins, &ins->src[0], sample_function.coord_mask, &coord_param);
3387
3388 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &lod_param);
3389
3390 if (!gl_info->supported[ARB_SHADER_TEXTURE_LOD] && !gl_info->supported[EXT_GPU_SHADER4]
3391 && shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type))
3392 {
3393 /* The GLSL spec claims the Lod sampling functions are only supported in vertex shaders.
3394 * However, they seem to work just fine in fragment shaders as well. */
3395 WARN("Using %s in fragment shader.\n", sample_function.name);
3396 }
3397 shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, swizzle, NULL, NULL, lod_param.param_str,
3398 "%s", coord_param.param_str);
3399}
3400
3401static void shader_glsl_texcoord(const struct wined3d_shader_instruction *ins)
3402{
3403 /* FIXME: Make this work for more than just 2D textures */
3404 struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
3405 DWORD write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
3406
3407 if (!(ins->ctx->reg_maps->shader_version.major == 1 && ins->ctx->reg_maps->shader_version.minor == 4))
3408 {
3409 char dst_mask[6];
3410
3411 shader_glsl_get_write_mask(&ins->dst[0], dst_mask);
3412 shader_addline(buffer, "clamp(gl_TexCoord[%u], 0.0, 1.0)%s);\n",
3413 ins->dst[0].reg.idx, dst_mask);
3414 } else {
3415 DWORD reg = ins->src[0].reg.idx;
3416 DWORD src_mod = ins->src[0].modifiers;
3417 char dst_swizzle[6];
3418
3419 shader_glsl_get_swizzle(&ins->src[0], FALSE, write_mask, dst_swizzle);
3420
3421 if (src_mod == WINED3DSPSM_DZ) {
3422 glsl_src_param_t div_param;
3423 unsigned int mask_size = shader_glsl_get_write_mask_size(write_mask);
3424 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_2, &div_param);
3425
3426 if (mask_size > 1) {
3427 shader_addline(buffer, "gl_TexCoord[%u]%s / vec%d(%s));\n", reg, dst_swizzle, mask_size, div_param.param_str);
3428 } else {
3429 shader_addline(buffer, "gl_TexCoord[%u]%s / %s);\n", reg, dst_swizzle, div_param.param_str);
3430 }
3431 } else if (src_mod == WINED3DSPSM_DW) {
3432 glsl_src_param_t div_param;
3433 unsigned int mask_size = shader_glsl_get_write_mask_size(write_mask);
3434 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &div_param);
3435
3436 if (mask_size > 1) {
3437 shader_addline(buffer, "gl_TexCoord[%u]%s / vec%d(%s));\n", reg, dst_swizzle, mask_size, div_param.param_str);
3438 } else {
3439 shader_addline(buffer, "gl_TexCoord[%u]%s / %s);\n", reg, dst_swizzle, div_param.param_str);
3440 }
3441 } else {
3442 shader_addline(buffer, "gl_TexCoord[%u]%s);\n", reg, dst_swizzle);
3443 }
3444 }
3445}
3446
3447/** Process the WINED3DSIO_TEXDP3TEX instruction in GLSL:
3448 * Take a 3-component dot product of the TexCoord[dstreg] and src,
3449 * then perform a 1D texture lookup from stage dstregnum, place into dst. */
3450static void shader_glsl_texdp3tex(const struct wined3d_shader_instruction *ins)
3451{
3452 const struct wined3d_gl_info *gl_info = ins->ctx->gl_info;
3453 glsl_src_param_t src0_param;
3454 glsl_sample_function_t sample_function;
3455 DWORD sampler_idx = ins->dst[0].reg.idx;
3456 DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
3457 WINED3DSAMPLER_TEXTURE_TYPE sampler_type = ins->ctx->reg_maps->sampler_type[sampler_idx];
3458 UINT mask_size;
3459
3460 shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
3461
3462 /* Do I have to take care about the projected bit? I don't think so, since the dp3 returns only one
3463 * scalar, and projected sampling would require 4.
3464 *
3465 * It is a dependent read - not valid with conditional NP2 textures
3466 */
3467 shader_glsl_get_sample_function(gl_info, sampler_type, 0, &sample_function);
3468 mask_size = shader_glsl_get_write_mask_size(sample_function.coord_mask);
3469
3470 switch(mask_size)
3471 {
3472 case 1:
3473 shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL,
3474 "dot(gl_TexCoord[%u].xyz, %s)", sampler_idx, src0_param.param_str);
3475 break;
3476
3477 case 2:
3478 shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL,
3479 "vec2(dot(gl_TexCoord[%u].xyz, %s), 0.0)", sampler_idx, src0_param.param_str);
3480 break;
3481
3482 case 3:
3483 shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL,
3484 "vec3(dot(gl_TexCoord[%u].xyz, %s), 0.0, 0.0)", sampler_idx, src0_param.param_str);
3485 break;
3486
3487 default:
3488 FIXME("Unexpected mask size %u\n", mask_size);
3489 break;
3490 }
3491}
3492
3493/** Process the WINED3DSIO_TEXDP3 instruction in GLSL:
3494 * Take a 3-component dot product of the TexCoord[dstreg] and src. */
3495static void shader_glsl_texdp3(const struct wined3d_shader_instruction *ins)
3496{
3497 glsl_src_param_t src0_param;
3498 DWORD dstreg = ins->dst[0].reg.idx;
3499 DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
3500 DWORD dst_mask;
3501 unsigned int mask_size;
3502
3503 dst_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
3504 mask_size = shader_glsl_get_write_mask_size(dst_mask);
3505 shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
3506
3507 if (mask_size > 1) {
3508 shader_addline(ins->ctx->buffer, "vec%d(dot(T%u.xyz, %s)));\n", mask_size, dstreg, src0_param.param_str);
3509 } else {
3510 shader_addline(ins->ctx->buffer, "dot(T%u.xyz, %s));\n", dstreg, src0_param.param_str);
3511 }
3512}
3513
3514/** Process the WINED3DSIO_TEXDEPTH instruction in GLSL:
3515 * Calculate the depth as dst.x / dst.y */
3516static void shader_glsl_texdepth(const struct wined3d_shader_instruction *ins)
3517{
3518 glsl_dst_param_t dst_param;
3519
3520 shader_glsl_add_dst_param(ins, &ins->dst[0], &dst_param);
3521
3522 /* Tests show that texdepth never returns anything below 0.0, and that r5.y is clamped to 1.0.
3523 * Negative input is accepted, -0.25 / -0.5 returns 0.5. GL should clamp gl_FragDepth to [0;1], but
3524 * this doesn't always work, so clamp the results manually. Whether or not the x value is clamped at 1
3525 * too is irrelevant, since if x = 0, any y value < 1.0 (and > 1.0 is not allowed) results in a result
3526 * >= 1.0 or < 0.0
3527 */
3528 shader_addline(ins->ctx->buffer, "gl_FragDepth = clamp((%s.x / min(%s.y, 1.0)), 0.0, 1.0);\n",
3529 dst_param.reg_name, dst_param.reg_name);
3530}
3531
3532/** Process the WINED3DSIO_TEXM3X2DEPTH instruction in GLSL:
3533 * Last row of a 3x2 matrix multiply, use the result to calculate the depth:
3534 * Calculate tmp0.y = TexCoord[dstreg] . src.xyz; (tmp0.x has already been calculated)
3535 * depth = (tmp0.y == 0.0) ? 1.0 : tmp0.x / tmp0.y
3536 */
3537static void shader_glsl_texm3x2depth(const struct wined3d_shader_instruction *ins)
3538{
3539 DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
3540 DWORD dstreg = ins->dst[0].reg.idx;
3541 glsl_src_param_t src0_param;
3542
3543 shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
3544
3545 shader_addline(ins->ctx->buffer, "tmp0.y = dot(T%u.xyz, %s);\n", dstreg, src0_param.param_str);
3546 shader_addline(ins->ctx->buffer, "gl_FragDepth = (tmp0.y == 0.0) ? 1.0 : clamp(tmp0.x / tmp0.y, 0.0, 1.0);\n");
3547}
3548
3549/** Process the WINED3DSIO_TEXM3X2PAD instruction in GLSL
3550 * Calculate the 1st of a 2-row matrix multiplication. */
3551static void shader_glsl_texm3x2pad(const struct wined3d_shader_instruction *ins)
3552{
3553 DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
3554 DWORD reg = ins->dst[0].reg.idx;
3555 struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
3556 glsl_src_param_t src0_param;
3557
3558 shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
3559 shader_addline(buffer, "tmp0.x = dot(T%u.xyz, %s);\n", reg, src0_param.param_str);
3560}
3561
3562/** Process the WINED3DSIO_TEXM3X3PAD instruction in GLSL
3563 * Calculate the 1st or 2nd row of a 3-row matrix multiplication. */
3564static void shader_glsl_texm3x3pad(const struct wined3d_shader_instruction *ins)
3565{
3566 IWineD3DBaseShaderImpl *shader = (IWineD3DBaseShaderImpl *)ins->ctx->shader;
3567 DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
3568 DWORD reg = ins->dst[0].reg.idx;
3569 struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
3570 SHADER_PARSE_STATE* current_state = &shader->baseShader.parse_state;
3571 glsl_src_param_t src0_param;
3572
3573 shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
3574 shader_addline(buffer, "tmp0.%c = dot(T%u.xyz, %s);\n", 'x' + current_state->current_row, reg, src0_param.param_str);
3575 current_state->texcoord_w[current_state->current_row++] = reg;
3576}
3577
3578static void shader_glsl_texm3x2tex(const struct wined3d_shader_instruction *ins)
3579{
3580 const struct wined3d_gl_info *gl_info = ins->ctx->gl_info;
3581 DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
3582 DWORD reg = ins->dst[0].reg.idx;
3583 struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
3584 glsl_src_param_t src0_param;
3585 WINED3DSAMPLER_TEXTURE_TYPE sampler_type = ins->ctx->reg_maps->sampler_type[reg];
3586 glsl_sample_function_t sample_function;
3587
3588 shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
3589 shader_addline(buffer, "tmp0.y = dot(T%u.xyz, %s);\n", reg, src0_param.param_str);
3590
3591 shader_glsl_get_sample_function(gl_info, sampler_type, 0, &sample_function);
3592
3593 /* Sample the texture using the calculated coordinates */
3594 shader_glsl_gen_sample_code(ins, reg, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL, "tmp0.xy");
3595}
3596
3597/** Process the WINED3DSIO_TEXM3X3TEX instruction in GLSL
3598 * Perform the 3rd row of a 3x3 matrix multiply, then sample the texture using the calculated coordinates */
3599static void shader_glsl_texm3x3tex(const struct wined3d_shader_instruction *ins)
3600{
3601 DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
3602 IWineD3DBaseShaderImpl *shader = (IWineD3DBaseShaderImpl *)ins->ctx->shader;
3603 SHADER_PARSE_STATE *current_state = &shader->baseShader.parse_state;
3604 const struct wined3d_gl_info *gl_info = ins->ctx->gl_info;
3605 glsl_src_param_t src0_param;
3606 DWORD reg = ins->dst[0].reg.idx;
3607 WINED3DSAMPLER_TEXTURE_TYPE sampler_type = ins->ctx->reg_maps->sampler_type[reg];
3608 glsl_sample_function_t sample_function;
3609
3610 shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
3611 shader_addline(ins->ctx->buffer, "tmp0.z = dot(T%u.xyz, %s);\n", reg, src0_param.param_str);
3612
3613 /* Dependent read, not valid with conditional NP2 */
3614 shader_glsl_get_sample_function(gl_info, sampler_type, 0, &sample_function);
3615
3616 /* Sample the texture using the calculated coordinates */
3617 shader_glsl_gen_sample_code(ins, reg, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL, "tmp0.xyz");
3618
3619 current_state->current_row = 0;
3620}
3621
3622/** Process the WINED3DSIO_TEXM3X3 instruction in GLSL
3623 * Perform the 3rd row of a 3x3 matrix multiply */
3624static void shader_glsl_texm3x3(const struct wined3d_shader_instruction *ins)
3625{
3626 DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
3627 IWineD3DBaseShaderImpl *shader = (IWineD3DBaseShaderImpl *)ins->ctx->shader;
3628 SHADER_PARSE_STATE *current_state = &shader->baseShader.parse_state;
3629 glsl_src_param_t src0_param;
3630 char dst_mask[6];
3631 DWORD reg = ins->dst[0].reg.idx;
3632
3633 shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
3634
3635 shader_glsl_append_dst(ins->ctx->buffer, ins);
3636 shader_glsl_get_write_mask(&ins->dst[0], dst_mask);
3637 shader_addline(ins->ctx->buffer, "vec4(tmp0.xy, dot(T%u.xyz, %s), 1.0)%s);\n", reg, src0_param.param_str, dst_mask);
3638
3639 current_state->current_row = 0;
3640}
3641
3642/* Process the WINED3DSIO_TEXM3X3SPEC instruction in GLSL
3643 * Perform the final texture lookup based on the previous 2 3x3 matrix multiplies */
3644static void shader_glsl_texm3x3spec(const struct wined3d_shader_instruction *ins)
3645{
3646 IWineD3DBaseShaderImpl *shader = (IWineD3DBaseShaderImpl *)ins->ctx->shader;
3647 const struct wined3d_gl_info *gl_info = ins->ctx->gl_info;
3648 DWORD reg = ins->dst[0].reg.idx;
3649 glsl_src_param_t src0_param;
3650 glsl_src_param_t src1_param;
3651 struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
3652 SHADER_PARSE_STATE* current_state = &shader->baseShader.parse_state;
3653 WINED3DSAMPLER_TEXTURE_TYPE stype = ins->ctx->reg_maps->sampler_type[reg];
3654 DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
3655 glsl_sample_function_t sample_function;
3656
3657 shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
3658 shader_glsl_add_src_param(ins, &ins->src[1], src_mask, &src1_param);
3659
3660 /* Perform the last matrix multiply operation */
3661 shader_addline(buffer, "tmp0.z = dot(T%u.xyz, %s);\n", reg, src0_param.param_str);
3662 /* Reflection calculation */
3663 shader_addline(buffer, "tmp0.xyz = -reflect((%s), normalize(tmp0.xyz));\n", src1_param.param_str);
3664
3665 /* Dependent read, not valid with conditional NP2 */
3666 shader_glsl_get_sample_function(gl_info, stype, 0, &sample_function);
3667
3668 /* Sample the texture */
3669 shader_glsl_gen_sample_code(ins, reg, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL, "tmp0.xyz");
3670
3671 current_state->current_row = 0;
3672}
3673
3674/* Process the WINED3DSIO_TEXM3X3VSPEC instruction in GLSL
3675 * Perform the final texture lookup based on the previous 2 3x3 matrix multiplies */
3676static void shader_glsl_texm3x3vspec(const struct wined3d_shader_instruction *ins)
3677{
3678 IWineD3DBaseShaderImpl *shader = (IWineD3DBaseShaderImpl *)ins->ctx->shader;
3679 const struct wined3d_gl_info *gl_info = ins->ctx->gl_info;
3680 DWORD reg = ins->dst[0].reg.idx;
3681 struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
3682 SHADER_PARSE_STATE* current_state = &shader->baseShader.parse_state;
3683 glsl_src_param_t src0_param;
3684 DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
3685 WINED3DSAMPLER_TEXTURE_TYPE sampler_type = ins->ctx->reg_maps->sampler_type[reg];
3686 glsl_sample_function_t sample_function;
3687
3688 shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
3689
3690 /* Perform the last matrix multiply operation */
3691 shader_addline(buffer, "tmp0.z = dot(vec3(T%u), vec3(%s));\n", reg, src0_param.param_str);
3692
3693 /* Construct the eye-ray vector from w coordinates */
3694 shader_addline(buffer, "tmp1.xyz = normalize(vec3(gl_TexCoord[%u].w, gl_TexCoord[%u].w, gl_TexCoord[%u].w));\n",
3695 current_state->texcoord_w[0], current_state->texcoord_w[1], reg);
3696 shader_addline(buffer, "tmp0.xyz = -reflect(tmp1.xyz, normalize(tmp0.xyz));\n");
3697
3698 /* Dependent read, not valid with conditional NP2 */
3699 shader_glsl_get_sample_function(gl_info, sampler_type, 0, &sample_function);
3700
3701 /* Sample the texture using the calculated coordinates */
3702 shader_glsl_gen_sample_code(ins, reg, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL, "tmp0.xyz");
3703
3704 current_state->current_row = 0;
3705}
3706
3707/** Process the WINED3DSIO_TEXBEM instruction in GLSL.
3708 * Apply a fake bump map transform.
3709 * texbem is pshader <= 1.3 only, this saves a few version checks
3710 */
3711static void shader_glsl_texbem(const struct wined3d_shader_instruction *ins)
3712{
3713 /*IWineD3DBaseShaderImpl *shader = (IWineD3DBaseShaderImpl *)ins->ctx->shader;
3714 IWineD3DDeviceImpl *deviceImpl = (IWineD3DDeviceImpl *)shader->baseShader.device; - unused */
3715 const struct wined3d_gl_info *gl_info = ins->ctx->gl_info;
3716 const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data;
3717 glsl_sample_function_t sample_function;
3718 glsl_src_param_t coord_param;
3719 WINED3DSAMPLER_TEXTURE_TYPE sampler_type;
3720 DWORD sampler_idx;
3721 DWORD mask;
3722 DWORD flags;
3723 char coord_mask[6];
3724
3725 sampler_idx = ins->dst[0].reg.idx;
3726 flags = (priv->cur_ps_args->tex_transform >> (sampler_idx * WINED3D_PSARGS_TEXTRANSFORM_SHIFT))
3727 & WINED3D_PSARGS_TEXTRANSFORM_MASK;
3728
3729 sampler_type = ins->ctx->reg_maps->sampler_type[sampler_idx];
3730 /* Dependent read, not valid with conditional NP2 */
3731 shader_glsl_get_sample_function(gl_info, sampler_type, 0, &sample_function);
3732 mask = sample_function.coord_mask;
3733
3734 shader_glsl_write_mask_to_str(mask, coord_mask);
3735
3736 /* with projective textures, texbem only divides the static texture coord, not the displacement,
3737 * so we can't let the GL handle this.
3738 */
3739 if (flags & WINED3D_PSARGS_PROJECTED) {
3740 DWORD div_mask=0;
3741 char coord_div_mask[3];
3742 switch (flags & ~WINED3D_PSARGS_PROJECTED) {
3743 case WINED3DTTFF_COUNT1: FIXME("WINED3DTTFF_PROJECTED with WINED3DTTFF_COUNT1?\n"); break;
3744 case WINED3DTTFF_COUNT2: div_mask = WINED3DSP_WRITEMASK_1; break;
3745 case WINED3DTTFF_COUNT3: div_mask = WINED3DSP_WRITEMASK_2; break;
3746 case WINED3DTTFF_COUNT4:
3747 case WINED3DTTFF_DISABLE: div_mask = WINED3DSP_WRITEMASK_3; break;
3748 }
3749 shader_glsl_write_mask_to_str(div_mask, coord_div_mask);
3750 shader_addline(ins->ctx->buffer, "T%u%s /= T%u%s;\n", sampler_idx, coord_mask, sampler_idx, coord_div_mask);
3751 }
3752
3753 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1, &coord_param);
3754
3755 shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL,
3756 "T%u%s + vec4(bumpenvmat%d * %s, 0.0, 0.0)%s", sampler_idx, coord_mask, sampler_idx,
3757 coord_param.param_str, coord_mask);
3758
3759 if (ins->handler_idx == WINED3DSIH_TEXBEML)
3760 {
3761 glsl_src_param_t luminance_param;
3762 glsl_dst_param_t dst_param;
3763
3764 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_2, &luminance_param);
3765 shader_glsl_add_dst_param(ins, &ins->dst[0], &dst_param);
3766
3767 shader_addline(ins->ctx->buffer, "%s%s *= (%s * luminancescale%d + luminanceoffset%d);\n",
3768 dst_param.reg_name, dst_param.mask_str,
3769 luminance_param.param_str, sampler_idx, sampler_idx);
3770 }
3771}
3772
3773static void shader_glsl_bem(const struct wined3d_shader_instruction *ins)
3774{
3775 glsl_src_param_t src0_param, src1_param;
3776 DWORD sampler_idx = ins->dst[0].reg.idx;
3777
3778 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1, &src0_param);
3779 shader_glsl_add_src_param(ins, &ins->src[1], WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1, &src1_param);
3780
3781 shader_glsl_append_dst(ins->ctx->buffer, ins);
3782 shader_addline(ins->ctx->buffer, "%s + bumpenvmat%d * %s);\n",
3783 src0_param.param_str, sampler_idx, src1_param.param_str);
3784}
3785
3786/** Process the WINED3DSIO_TEXREG2AR instruction in GLSL
3787 * Sample 2D texture at dst using the alpha & red (wx) components of src as texture coordinates */
3788static void shader_glsl_texreg2ar(const struct wined3d_shader_instruction *ins)
3789{
3790 const struct wined3d_gl_info *gl_info = ins->ctx->gl_info;
3791 glsl_src_param_t src0_param;
3792 DWORD sampler_idx = ins->dst[0].reg.idx;
3793 WINED3DSAMPLER_TEXTURE_TYPE sampler_type = ins->ctx->reg_maps->sampler_type[sampler_idx];
3794 glsl_sample_function_t sample_function;
3795
3796 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_ALL, &src0_param);
3797
3798 shader_glsl_get_sample_function(gl_info, sampler_type, 0, &sample_function);
3799 shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL,
3800 "%s.wx", src0_param.reg_name);
3801}
3802
3803/** Process the WINED3DSIO_TEXREG2GB instruction in GLSL
3804 * Sample 2D texture at dst using the green & blue (yz) components of src as texture coordinates */
3805static void shader_glsl_texreg2gb(const struct wined3d_shader_instruction *ins)
3806{
3807 const struct wined3d_gl_info *gl_info = ins->ctx->gl_info;
3808 glsl_src_param_t src0_param;
3809 DWORD sampler_idx = ins->dst[0].reg.idx;
3810 WINED3DSAMPLER_TEXTURE_TYPE sampler_type = ins->ctx->reg_maps->sampler_type[sampler_idx];
3811 glsl_sample_function_t sample_function;
3812
3813 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_ALL, &src0_param);
3814
3815 shader_glsl_get_sample_function(gl_info, sampler_type, 0, &sample_function);
3816 shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL,
3817 "%s.yz", src0_param.reg_name);
3818}
3819
3820/** Process the WINED3DSIO_TEXREG2RGB instruction in GLSL
3821 * Sample texture at dst using the rgb (xyz) components of src as texture coordinates */
3822static void shader_glsl_texreg2rgb(const struct wined3d_shader_instruction *ins)
3823{
3824 const struct wined3d_gl_info *gl_info = ins->ctx->gl_info;
3825 glsl_src_param_t src0_param;
3826 DWORD sampler_idx = ins->dst[0].reg.idx;
3827 WINED3DSAMPLER_TEXTURE_TYPE sampler_type = ins->ctx->reg_maps->sampler_type[sampler_idx];
3828 glsl_sample_function_t sample_function;
3829
3830 /* Dependent read, not valid with conditional NP2 */
3831 shader_glsl_get_sample_function(gl_info, sampler_type, 0, &sample_function);
3832 shader_glsl_add_src_param(ins, &ins->src[0], sample_function.coord_mask, &src0_param);
3833
3834 shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL,
3835 "%s", src0_param.param_str);
3836}
3837
3838/** Process the WINED3DSIO_TEXKILL instruction in GLSL.
3839 * If any of the first 3 components are < 0, discard this pixel */
3840static void shader_glsl_texkill(const struct wined3d_shader_instruction *ins)
3841{
3842 glsl_dst_param_t dst_param;
3843
3844 /* The argument is a destination parameter, and no writemasks are allowed */
3845 shader_glsl_add_dst_param(ins, &ins->dst[0], &dst_param);
3846 if (ins->ctx->reg_maps->shader_version.major >= 2)
3847 {
3848 /* 2.0 shaders compare all 4 components in texkill */
3849 shader_addline(ins->ctx->buffer, "if (any(lessThan(%s.xyzw, vec4(0.0)))) discard;\n", dst_param.reg_name);
3850 } else {
3851 /* 1.X shaders only compare the first 3 components, probably due to the nature of the texkill
3852 * instruction as a tex* instruction, and phase, which kills all a / w components. Even if all
3853 * 4 components are defined, only the first 3 are used
3854 */
3855 shader_addline(ins->ctx->buffer, "if (any(lessThan(%s.xyz, vec3(0.0)))) discard;\n", dst_param.reg_name);
3856 }
3857}
3858
3859/** Process the WINED3DSIO_DP2ADD instruction in GLSL.
3860 * dst = dot2(src0, src1) + src2 */
3861static void shader_glsl_dp2add(const struct wined3d_shader_instruction *ins)
3862{
3863 glsl_src_param_t src0_param;
3864 glsl_src_param_t src1_param;
3865 glsl_src_param_t src2_param;
3866 DWORD write_mask;
3867 unsigned int mask_size;
3868
3869 write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
3870 mask_size = shader_glsl_get_write_mask_size(write_mask);
3871
3872 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1, &src0_param);
3873 shader_glsl_add_src_param(ins, &ins->src[1], WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1, &src1_param);
3874 shader_glsl_add_src_param(ins, &ins->src[2], WINED3DSP_WRITEMASK_0, &src2_param);
3875
3876 if (mask_size > 1) {
3877 shader_addline(ins->ctx->buffer, "vec%d(dot(%s, %s) + %s));\n",
3878 mask_size, src0_param.param_str, src1_param.param_str, src2_param.param_str);
3879 } else {
3880 shader_addline(ins->ctx->buffer, "dot(%s, %s) + %s);\n",
3881 src0_param.param_str, src1_param.param_str, src2_param.param_str);
3882 }
3883}
3884
3885static void shader_glsl_input_pack(IWineD3DPixelShader *iface, struct wined3d_shader_buffer *buffer,
3886 const struct wined3d_shader_signature_element *input_signature, const struct shader_reg_maps *reg_maps,
3887 enum vertexprocessing_mode vertexprocessing)
3888{
3889 unsigned int i;
3890 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
3891 WORD map = reg_maps->input_registers;
3892
3893 for (i = 0; map; map >>= 1, ++i)
3894 {
3895 const char *semantic_name;
3896 UINT semantic_idx;
3897 char reg_mask[6];
3898
3899 /* Unused */
3900 if (!(map & 1)) continue;
3901
3902 semantic_name = input_signature[i].semantic_name;
3903 semantic_idx = input_signature[i].semantic_idx;
3904 shader_glsl_write_mask_to_str(input_signature[i].mask, reg_mask);
3905
3906 if (shader_match_semantic(semantic_name, WINED3DDECLUSAGE_TEXCOORD))
3907 {
3908 if (semantic_idx < 8 && vertexprocessing == pretransformed)
3909 shader_addline(buffer, "IN[%u]%s = gl_TexCoord[%u]%s;\n",
3910 This->input_reg_map[i], reg_mask, semantic_idx, reg_mask);
3911 else
3912 shader_addline(buffer, "IN[%u]%s = vec4(0.0, 0.0, 0.0, 0.0)%s;\n",
3913 This->input_reg_map[i], reg_mask, reg_mask);
3914 }
3915 else if (shader_match_semantic(semantic_name, WINED3DDECLUSAGE_COLOR))
3916 {
3917 if (semantic_idx == 0)
3918 shader_addline(buffer, "IN[%u]%s = vec4(gl_Color)%s;\n",
3919 This->input_reg_map[i], reg_mask, reg_mask);
3920 else if (semantic_idx == 1)
3921 shader_addline(buffer, "IN[%u]%s = vec4(gl_SecondaryColor)%s;\n",
3922 This->input_reg_map[i], reg_mask, reg_mask);
3923 else
3924 shader_addline(buffer, "IN[%u]%s = vec4(0.0, 0.0, 0.0, 0.0)%s;\n",
3925 This->input_reg_map[i], reg_mask, reg_mask);
3926 }
3927 else
3928 {
3929 shader_addline(buffer, "IN[%u]%s = vec4(0.0, 0.0, 0.0, 0.0)%s;\n",
3930 This->input_reg_map[i], reg_mask, reg_mask);
3931 }
3932 }
3933}
3934
3935/*********************************************
3936 * Vertex Shader Specific Code begins here
3937 ********************************************/
3938
3939static void add_glsl_program_entry(struct shader_glsl_priv *priv, struct glsl_shader_prog_link *entry) {
3940 glsl_program_key_t key;
3941
3942 key.vshader = entry->vshader;
3943 key.pshader = entry->pshader;
3944 key.vs_args = entry->vs_args;
3945 key.ps_args = entry->ps_args;
3946 key.context = entry->context;
3947
3948 if (wine_rb_put(&priv->program_lookup, &key, &entry->program_lookup_entry) == -1)
3949 {
3950 ERR("Failed to insert program entry.\n");
3951 }
3952}
3953
3954static struct glsl_shader_prog_link *get_glsl_program_entry(struct shader_glsl_priv *priv,
3955 IWineD3DVertexShader *vshader, IWineD3DPixelShader *pshader, struct vs_compile_args *vs_args,
3956 struct ps_compile_args *ps_args, const struct wined3d_context *context) {
3957 struct wine_rb_entry *entry;
3958 glsl_program_key_t key;
3959
3960 key.vshader = vshader;
3961 key.pshader = pshader;
3962 key.vs_args = *vs_args;
3963 key.ps_args = *ps_args;
3964 key.context = context;
3965
3966 entry = wine_rb_get(&priv->program_lookup, &key);
3967 return entry ? WINE_RB_ENTRY_VALUE(entry, struct glsl_shader_prog_link, program_lookup_entry) : NULL;
3968}
3969
3970/* GL locking is done by the caller */
3971static void delete_glsl_program_entry(struct shader_glsl_priv *priv, const struct wined3d_gl_info *gl_info,
3972 struct glsl_shader_prog_link *entry)
3973{
3974 glsl_program_key_t key;
3975
3976 key.vshader = entry->vshader;
3977 key.pshader = entry->pshader;
3978 key.vs_args = entry->vs_args;
3979 key.ps_args = entry->ps_args;
3980 key.context = entry->context;
3981 wine_rb_remove(&priv->program_lookup, &key);
3982
3983 if (context_get_current() == entry->context)
3984 {
3985 TRACE("deleting program %p\n", (void *)(uintptr_t)entry->programId);
3986 GL_EXTCALL(glDeleteObjectARB(entry->programId));
3987 checkGLcall("glDeleteObjectARB");
3988 }
3989 else
3990 {
3991 WARN("Attempting to delete program %p created in ctx %p from ctx %p\n", (void *)(uintptr_t)entry->programId, entry->context, context_get_current());
3992 }
3993
3994 if (entry->vshader) list_remove(&entry->vshader_entry);
3995 if (entry->pshader) list_remove(&entry->pshader_entry);
3996 HeapFree(GetProcessHeap(), 0, entry->vuniformF_locations);
3997 HeapFree(GetProcessHeap(), 0, entry->puniformF_locations);
3998 HeapFree(GetProcessHeap(), 0, entry);
3999}
4000
4001static void handle_ps3_input(struct wined3d_shader_buffer *buffer, const struct wined3d_gl_info *gl_info, const DWORD *map,
4002 const struct wined3d_shader_signature_element *input_signature, const struct shader_reg_maps *reg_maps_in,
4003 const struct wined3d_shader_signature_element *output_signature, const struct shader_reg_maps *reg_maps_out)
4004{
4005 unsigned int i, j;
4006 const char *semantic_name_in, *semantic_name_out;
4007 UINT semantic_idx_in, semantic_idx_out;
4008 DWORD *set;
4009 DWORD in_idx;
4010 unsigned int in_count = vec4_varyings(3, gl_info);
4011 char reg_mask[6], reg_mask_out[6];
4012 char destination[50];
4013 WORD input_map, output_map;
4014
4015 set = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*set) * (in_count + 2));
4016
4017 if (!output_signature)
4018 {
4019 /* Save gl_FrontColor & gl_FrontSecondaryColor before overwriting them. */
4020 shader_addline(buffer, "vec4 front_color = gl_FrontColor;\n");
4021 shader_addline(buffer, "vec4 front_secondary_color = gl_FrontSecondaryColor;\n");
4022 }
4023
4024 input_map = reg_maps_in->input_registers;
4025 for (i = 0; input_map; input_map >>= 1, ++i)
4026 {
4027 if (!(input_map & 1)) continue;
4028
4029 in_idx = map[i];
4030 if (in_idx >= (in_count + 2)) {
4031 FIXME("More input varyings declared than supported, expect issues\n");
4032 continue;
4033 }
4034 else if (map[i] == ~0U)
4035 {
4036 /* Declared, but not read register */
4037 continue;
4038 }
4039
4040 if (in_idx == in_count) {
4041 sprintf(destination, "gl_FrontColor");
4042 } else if (in_idx == in_count + 1) {
4043 sprintf(destination, "gl_FrontSecondaryColor");
4044 } else {
4045 sprintf(destination, "IN[%u]", in_idx);
4046 }
4047
4048 semantic_name_in = input_signature[i].semantic_name;
4049 semantic_idx_in = input_signature[i].semantic_idx;
4050 set[map[i]] = input_signature[i].mask;
4051 shader_glsl_write_mask_to_str(input_signature[i].mask, reg_mask);
4052
4053 if (!output_signature)
4054 {
4055 if (shader_match_semantic(semantic_name_in, WINED3DDECLUSAGE_COLOR))
4056 {
4057 if (semantic_idx_in == 0)
4058 shader_addline(buffer, "%s%s = front_color%s;\n",
4059 destination, reg_mask, reg_mask);
4060 else if (semantic_idx_in == 1)
4061 shader_addline(buffer, "%s%s = front_secondary_color%s;\n",
4062 destination, reg_mask, reg_mask);
4063 else
4064 shader_addline(buffer, "%s%s = vec4(0.0, 0.0, 0.0, 0.0)%s;\n",
4065 destination, reg_mask, reg_mask);
4066 }
4067 else if (shader_match_semantic(semantic_name_in, WINED3DDECLUSAGE_TEXCOORD))
4068 {
4069 if (semantic_idx_in < 8)
4070 {
4071 shader_addline(buffer, "%s%s = gl_TexCoord[%u]%s;\n",
4072 destination, reg_mask, semantic_idx_in, reg_mask);
4073 }
4074 else
4075 {
4076 shader_addline(buffer, "%s%s = vec4(0.0, 0.0, 0.0, 0.0)%s;\n",
4077 destination, reg_mask, reg_mask);
4078 }
4079 }
4080 else if (shader_match_semantic(semantic_name_in, WINED3DDECLUSAGE_FOG))
4081 {
4082 shader_addline(buffer, "%s%s = vec4(gl_FogFragCoord, 0.0, 0.0, 0.0)%s;\n",
4083 destination, reg_mask, reg_mask);
4084 }
4085 else
4086 {
4087 shader_addline(buffer, "%s%s = vec4(0.0, 0.0, 0.0, 0.0)%s;\n",
4088 destination, reg_mask, reg_mask);
4089 }
4090 } else {
4091 BOOL found = FALSE;
4092
4093 output_map = reg_maps_out->output_registers;
4094 for (j = 0; output_map; output_map >>= 1, ++j)
4095 {
4096 if (!(output_map & 1)) continue;
4097
4098 semantic_name_out = output_signature[j].semantic_name;
4099 semantic_idx_out = output_signature[j].semantic_idx;
4100 shader_glsl_write_mask_to_str(output_signature[j].mask, reg_mask_out);
4101
4102 if (semantic_idx_in == semantic_idx_out
4103 && !strcmp(semantic_name_in, semantic_name_out))
4104 {
4105 shader_addline(buffer, "%s%s = OUT[%u]%s;\n",
4106 destination, reg_mask, j, reg_mask);
4107 found = TRUE;
4108 }
4109 }
4110 if(!found) {
4111 shader_addline(buffer, "%s%s = vec4(0.0, 0.0, 0.0, 0.0)%s;\n",
4112 destination, reg_mask, reg_mask);
4113 }
4114 }
4115 }
4116
4117 /* This is solely to make the compiler / linker happy and avoid warning about undefined
4118 * varyings. It shouldn't result in any real code executed on the GPU, since all read
4119 * input varyings are assigned above, if the optimizer works properly.
4120 */
4121 for(i = 0; i < in_count + 2; i++) {
4122 if (set[i] && set[i] != WINED3DSP_WRITEMASK_ALL)
4123 {
4124 unsigned int size = 0;
4125 memset(reg_mask, 0, sizeof(reg_mask));
4126 if(!(set[i] & WINED3DSP_WRITEMASK_0)) {
4127 reg_mask[size] = 'x';
4128 size++;
4129 }
4130 if(!(set[i] & WINED3DSP_WRITEMASK_1)) {
4131 reg_mask[size] = 'y';
4132 size++;
4133 }
4134 if(!(set[i] & WINED3DSP_WRITEMASK_2)) {
4135 reg_mask[size] = 'z';
4136 size++;
4137 }
4138 if(!(set[i] & WINED3DSP_WRITEMASK_3)) {
4139 reg_mask[size] = 'w';
4140 size++;
4141 }
4142
4143 if (i == in_count) {
4144 sprintf(destination, "gl_FrontColor");
4145 } else if (i == in_count + 1) {
4146 sprintf(destination, "gl_FrontSecondaryColor");
4147 } else {
4148 sprintf(destination, "IN[%u]", i);
4149 }
4150
4151 if (size == 1) {
4152 shader_addline(buffer, "%s.%s = 0.0;\n", destination, reg_mask);
4153 } else {
4154 shader_addline(buffer, "%s.%s = vec%u(0.0);\n", destination, reg_mask, size);
4155 }
4156 }
4157 }
4158
4159 HeapFree(GetProcessHeap(), 0, set);
4160}
4161
4162static void generate_texcoord_assignment(struct wined3d_shader_buffer *buffer, IWineD3DVertexShaderImpl *vs, IWineD3DPixelShaderImpl *ps)
4163{
4164 DWORD map;
4165 unsigned int i;
4166 char reg_mask[6];
4167
4168 if (!ps)
4169 return;
4170
4171 for (i = 0, map = ps->baseShader.reg_maps.texcoord; map && i < min(8, MAX_REG_TEXCRD); map >>= 1, ++i)
4172 {
4173 if (!(map & 1))
4174 continue;
4175
4176 /* so far we assume that if texcoord_mask has any write flags, they are assigned appropriately with pixel shader */
4177 if ((vs->baseShader.reg_maps.texcoord_mask[i]) & WINED3DSP_WRITEMASK_ALL)
4178 continue;
4179
4180 shader_glsl_write_mask_to_str(WINED3DSP_WRITEMASK_ALL, reg_mask);
4181 shader_addline(buffer, "gl_TexCoord[%u]%s = gl_MultiTexCoord%u%s;\n", i, reg_mask, i, reg_mask);
4182 }
4183}
4184
4185/* GL locking is done by the caller */
4186static GLhandleARB generate_param_reorder_function(struct wined3d_shader_buffer *buffer,
4187 IWineD3DVertexShader *a_vertexshader, IWineD3DPixelShader *pixelshader, const struct wined3d_gl_info *gl_info)
4188{
4189 GLhandleARB ret = 0;
4190 IWineD3DVertexShaderImpl *vs = (IWineD3DVertexShaderImpl *) a_vertexshader;
4191 IWineD3DPixelShaderImpl *ps = (IWineD3DPixelShaderImpl *) pixelshader;
4192 IWineD3DDeviceImpl *device;
4193 DWORD vs_major = vs->baseShader.reg_maps.shader_version.major;
4194 DWORD ps_major = ps ? ps->baseShader.reg_maps.shader_version.major : 0;
4195 unsigned int i;
4196 const char *semantic_name;
4197 UINT semantic_idx;
4198 char reg_mask[6];
4199 const struct wined3d_shader_signature_element *output_signature;
4200
4201 shader_buffer_clear(buffer);
4202
4203 shader_addline(buffer, "#version 120\n");
4204
4205 if(vs_major < 3 && ps_major < 3) {
4206 /* That one is easy: The vertex shader writes to the builtin varyings, the pixel shader reads from them.
4207 * Take care about the texcoord .w fixup though if we're using the fixed function fragment pipeline
4208 */
4209 device = (IWineD3DDeviceImpl *) vs->baseShader.device;
4210 if ((gl_info->quirks & WINED3D_QUIRK_SET_TEXCOORD_W)
4211 && ps_major == 0 && vs_major > 0 && !device->frag_pipe->ffp_proj_control)
4212 {
4213 shader_addline(buffer, "void order_ps_input() {\n");
4214 for(i = 0; i < min(8, MAX_REG_TEXCRD); i++) {
4215 if(vs->baseShader.reg_maps.texcoord_mask[i] != 0 &&
4216 vs->baseShader.reg_maps.texcoord_mask[i] != WINED3DSP_WRITEMASK_ALL) {
4217 shader_addline(buffer, "gl_TexCoord[%u].w = 1.0;\n", i);
4218 }
4219 }
4220 shader_addline(buffer, "}\n");
4221 } else {
4222 shader_addline(buffer, "void order_ps_input() {\n");
4223 generate_texcoord_assignment(buffer, vs, ps);
4224 shader_addline(buffer, "}\n");
4225 }
4226 } else if(ps_major < 3 && vs_major >= 3) {
4227 WORD map = vs->baseShader.reg_maps.output_registers;
4228
4229 /* The vertex shader writes to its own varyings, the pixel shader needs them in the builtin ones */
4230 output_signature = vs->baseShader.output_signature;
4231
4232 shader_addline(buffer, "void order_ps_input(in vec4 OUT[%u]) {\n", MAX_REG_OUTPUT);
4233 for (i = 0; map; map >>= 1, ++i)
4234 {
4235 DWORD write_mask;
4236
4237 if (!(map & 1)) continue;
4238
4239 semantic_name = output_signature[i].semantic_name;
4240 semantic_idx = output_signature[i].semantic_idx;
4241 write_mask = output_signature[i].mask;
4242 shader_glsl_write_mask_to_str(write_mask, reg_mask);
4243
4244 if (shader_match_semantic(semantic_name, WINED3DDECLUSAGE_COLOR))
4245 {
4246 if (semantic_idx == 0)
4247 shader_addline(buffer, "gl_FrontColor%s = OUT[%u]%s;\n", reg_mask, i, reg_mask);
4248 else if (semantic_idx == 1)
4249 shader_addline(buffer, "gl_FrontSecondaryColor%s = OUT[%u]%s;\n", reg_mask, i, reg_mask);
4250 }
4251 else if (shader_match_semantic(semantic_name, WINED3DDECLUSAGE_POSITION))
4252 {
4253 shader_addline(buffer, "gl_Position%s = OUT[%u]%s;\n", reg_mask, i, reg_mask);
4254 }
4255 else if (shader_match_semantic(semantic_name, WINED3DDECLUSAGE_TEXCOORD))
4256 {
4257 if (semantic_idx < 8)
4258 {
4259 if (!(gl_info->quirks & WINED3D_QUIRK_SET_TEXCOORD_W) || ps_major > 0)
4260 write_mask |= WINED3DSP_WRITEMASK_3;
4261
4262 shader_addline(buffer, "gl_TexCoord[%u]%s = OUT[%u]%s;\n",
4263 semantic_idx, reg_mask, i, reg_mask);
4264 if (!(write_mask & WINED3DSP_WRITEMASK_3))
4265 shader_addline(buffer, "gl_TexCoord[%u].w = 1.0;\n", semantic_idx);
4266 }
4267 }
4268 else if (shader_match_semantic(semantic_name, WINED3DDECLUSAGE_PSIZE))
4269 {
4270 shader_addline(buffer, "gl_PointSize = OUT[%u].x;\n", i);
4271 }
4272 else if (shader_match_semantic(semantic_name, WINED3DDECLUSAGE_FOG))
4273 {
4274 shader_addline(buffer, "gl_FogFragCoord = OUT[%u].%c;\n", i, reg_mask[1]);
4275 }
4276 }
4277 shader_addline(buffer, "}\n");
4278
4279 } else if(ps_major >= 3 && vs_major >= 3) {
4280 WORD map = vs->baseShader.reg_maps.output_registers;
4281
4282 output_signature = vs->baseShader.output_signature;
4283
4284 /* This one is tricky: a 3.0 pixel shader reads from a 3.0 vertex shader */
4285 shader_addline(buffer, "varying vec4 IN[%u];\n", vec4_varyings(3, gl_info));
4286 shader_addline(buffer, "void order_ps_input(in vec4 OUT[%u]) {\n", MAX_REG_OUTPUT);
4287
4288 /* First, sort out position and point size. Those are not passed to the pixel shader */
4289 for (i = 0; map; map >>= 1, ++i)
4290 {
4291 if (!(map & 1)) continue;
4292
4293 semantic_name = output_signature[i].semantic_name;
4294 shader_glsl_write_mask_to_str(output_signature[i].mask, reg_mask);
4295
4296 if (shader_match_semantic(semantic_name, WINED3DDECLUSAGE_POSITION))
4297 {
4298 shader_addline(buffer, "gl_Position%s = OUT[%u]%s;\n", reg_mask, i, reg_mask);
4299 }
4300 else if (shader_match_semantic(semantic_name, WINED3DDECLUSAGE_PSIZE))
4301 {
4302 shader_addline(buffer, "gl_PointSize = OUT[%u].x;\n", i);
4303 }
4304 }
4305
4306 /* Then, fix the pixel shader input */
4307 handle_ps3_input(buffer, gl_info, ps->input_reg_map, ps->baseShader.input_signature,
4308 &ps->baseShader.reg_maps, output_signature, &vs->baseShader.reg_maps);
4309
4310 shader_addline(buffer, "}\n");
4311 } else if(ps_major >= 3 && vs_major < 3) {
4312 shader_addline(buffer, "varying vec4 IN[%u];\n", vec4_varyings(3, gl_info));
4313 shader_addline(buffer, "void order_ps_input() {\n");
4314 /* The vertex shader wrote to the builtin varyings. There is no need to figure out position and
4315 * point size, but we depend on the optimizers kindness to find out that the pixel shader doesn't
4316 * read gl_TexCoord and gl_ColorX, otherwise we'll run out of varyings
4317 */
4318 handle_ps3_input(buffer, gl_info, ps->input_reg_map, ps->baseShader.input_signature,
4319 &ps->baseShader.reg_maps, NULL, NULL);
4320 shader_addline(buffer, "}\n");
4321 } else {
4322 ERR("Unexpected vertex and pixel shader version condition: vs: %d, ps: %d\n", vs_major, ps_major);
4323 }
4324
4325 ret = GL_EXTCALL(glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB));
4326 checkGLcall("glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB)");
4327 GL_EXTCALL(glShaderSourceARB(ret, 1, (const char**)&buffer->buffer, NULL));
4328 checkGLcall("glShaderSourceARB(ret, 1, &buffer->buffer, NULL)");
4329 GL_EXTCALL(glCompileShaderARB(ret));
4330 checkGLcall("glCompileShaderARB(ret)");
4331 shader_glsl_validate_compile_link(gl_info, ret, FALSE);
4332 return ret;
4333}
4334
4335#ifdef VBOX_WITH_VMSVGA
4336static GLhandleARB generate_passthrough_vshader(const struct wined3d_gl_info *gl_info)
4337{
4338 GLhandleARB ret = 0;
4339 static const char *passthrough_vshader[] =
4340 {
4341 "#version 120\n"
4342 "vec4 R0;\n"
4343 "void main(void)\n"
4344 "{\n"
4345 " R0 = gl_Vertex;\n"
4346 " R0.w = 1.0;\n"
4347 " R0.z = 0.0;\n"
4348 " gl_Position = gl_ModelViewProjectionMatrix * R0;\n"
4349 "}\n"
4350 };
4351
4352 ret = GL_EXTCALL(glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB));
4353 checkGLcall("glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB)");
4354 GL_EXTCALL(glShaderSourceARB(ret, 1, passthrough_vshader, NULL));
4355 checkGLcall("glShaderSourceARB(ret, 1, passthrough_vshader, NULL)");
4356 GL_EXTCALL(glCompileShaderARB(ret));
4357 checkGLcall("glCompileShaderARB(ret)");
4358 shader_glsl_validate_compile_link(gl_info, ret, FALSE);
4359
4360 return ret;
4361}
4362
4363#endif
4364
4365/* GL locking is done by the caller */
4366static void hardcode_local_constants(IWineD3DBaseShaderImpl *shader, const struct wined3d_gl_info *gl_info,
4367 GLhandleARB programId, char prefix)
4368{
4369 const local_constant *lconst;
4370 GLint tmp_loc;
4371 const float *value;
4372 char glsl_name[8];
4373
4374 LIST_FOR_EACH_ENTRY(lconst, &shader->baseShader.constantsF, local_constant, entry) {
4375 value = (const float *)lconst->value;
4376 snprintf(glsl_name, sizeof(glsl_name), "%cLC%u", prefix, lconst->idx);
4377 tmp_loc = GL_EXTCALL(glGetUniformLocationARB(programId, glsl_name));
4378 GL_EXTCALL(glUniform4fvARB(tmp_loc, 1, value));
4379 }
4380 checkGLcall("Hardcoding local constants");
4381}
4382
4383/* GL locking is done by the caller */
4384#ifdef VBOX_WITH_VMSVGA
4385static GLhandleARB shader_glsl_generate_pshader(const struct wined3d_context *context,
4386#else
4387static GLuint shader_glsl_generate_pshader(const struct wined3d_context *context,
4388#endif
4389 struct wined3d_shader_buffer *buffer, IWineD3DPixelShaderImpl *This,
4390 const struct ps_compile_args *args, struct ps_np2fixup_info *np2fixup_info)
4391{
4392 const struct shader_reg_maps *reg_maps = &This->baseShader.reg_maps;
4393 const struct wined3d_gl_info *gl_info = context->gl_info;
4394 CONST DWORD *function = This->baseShader.function;
4395 struct shader_glsl_ctx_priv priv_ctx;
4396
4397 /* Create the hw GLSL shader object and assign it as the shader->prgId */
4398 GLhandleARB shader_obj = GL_EXTCALL(glCreateShaderObjectARB(GL_FRAGMENT_SHADER_ARB));
4399
4400 memset(&priv_ctx, 0, sizeof(priv_ctx));
4401 priv_ctx.cur_ps_args = args;
4402 priv_ctx.cur_np2fixup_info = np2fixup_info;
4403
4404 shader_addline(buffer, "#version 120\n");
4405
4406 if (gl_info->supported[ARB_SHADER_TEXTURE_LOD] && reg_maps->usestexldd)
4407 {
4408 shader_addline(buffer, "#extension GL_ARB_shader_texture_lod : enable\n");
4409 }
4410 if (gl_info->supported[ARB_TEXTURE_RECTANGLE])
4411 {
4412 /* The spec says that it doesn't have to be explicitly enabled, but the nvidia
4413 * drivers write a warning if we don't do so
4414 */
4415 shader_addline(buffer, "#extension GL_ARB_texture_rectangle : enable\n");
4416 }
4417 if (gl_info->supported[EXT_GPU_SHADER4])
4418 {
4419 shader_addline(buffer, "#extension GL_EXT_gpu_shader4 : enable\n");
4420 }
4421
4422 /* Base Declarations */
4423 shader_generate_glsl_declarations(context, buffer, (IWineD3DBaseShader *)This, reg_maps, &priv_ctx);
4424
4425 /* Pack 3.0 inputs */
4426 if (reg_maps->shader_version.major >= 3 && args->vp_mode != vertexshader)
4427 {
4428 shader_glsl_input_pack((IWineD3DPixelShader *) This, buffer,
4429 This->baseShader.input_signature, reg_maps, args->vp_mode);
4430 }
4431
4432 /* Base Shader Body */
4433 shader_generate_main((IWineD3DBaseShader *)This, buffer, reg_maps, function, &priv_ctx);
4434
4435 /* Pixel shaders < 2.0 place the resulting color in R0 implicitly */
4436 if (reg_maps->shader_version.major < 2)
4437 {
4438 /* Some older cards like GeforceFX ones don't support multiple buffers, so also not gl_FragData */
4439 shader_addline(buffer, "gl_FragData[0] = R0;\n");
4440 }
4441
4442 if (args->srgb_correction)
4443 {
4444 shader_addline(buffer, "tmp0.xyz = pow(gl_FragData[0].xyz, vec3(srgb_const0.x));\n");
4445 shader_addline(buffer, "tmp0.xyz = tmp0.xyz * vec3(srgb_const0.y) - vec3(srgb_const0.z);\n");
4446 shader_addline(buffer, "tmp1.xyz = gl_FragData[0].xyz * vec3(srgb_const0.w);\n");
4447 shader_addline(buffer, "bvec3 srgb_compare = lessThan(gl_FragData[0].xyz, vec3(srgb_const1.x));\n");
4448 shader_addline(buffer, "gl_FragData[0].xyz = mix(tmp0.xyz, tmp1.xyz, vec3(srgb_compare));\n");
4449 shader_addline(buffer, "gl_FragData[0] = clamp(gl_FragData[0], 0.0, 1.0);\n");
4450 }
4451 /* Pixel shader < 3.0 do not replace the fog stage.
4452 * This implements linear fog computation and blending.
4453 * TODO: non linear fog
4454 * NOTE: gl_Fog.start and gl_Fog.end don't hold fog start s and end e but
4455 * -1/(e-s) and e/(e-s) respectively.
4456 */
4457 if (reg_maps->shader_version.major < 3)
4458 {
4459 switch(args->fog) {
4460 case FOG_OFF: break;
4461 case FOG_LINEAR:
4462 shader_addline(buffer, "float fogstart = -1.0 / (gl_Fog.end - gl_Fog.start);\n");
4463 shader_addline(buffer, "float fogend = gl_Fog.end * -fogstart;\n");
4464 shader_addline(buffer, "float Fog = clamp(gl_FogFragCoord * fogstart + fogend, 0.0, 1.0);\n");
4465 shader_addline(buffer, "gl_FragData[0].xyz = mix(gl_Fog.color.xyz, gl_FragData[0].xyz, Fog);\n");
4466 break;
4467 case FOG_EXP:
4468 /* Fog = e^(-gl_Fog.density * gl_FogFragCoord) */
4469 shader_addline(buffer, "float Fog = exp(-gl_Fog.density * gl_FogFragCoord);\n");
4470 shader_addline(buffer, "Fog = clamp(Fog, 0.0, 1.0);\n");
4471 shader_addline(buffer, "gl_FragData[0].xyz = mix(gl_Fog.color.xyz, gl_FragData[0].xyz, Fog);\n");
4472 break;
4473 case FOG_EXP2:
4474 /* Fog = e^(-(gl_Fog.density * gl_FogFragCoord)^2) */
4475 shader_addline(buffer, "float Fog = exp(-gl_Fog.density * gl_Fog.density * gl_FogFragCoord * gl_FogFragCoord);\n");
4476 shader_addline(buffer, "Fog = clamp(Fog, 0.0, 1.0);\n");
4477 shader_addline(buffer, "gl_FragData[0].xyz = mix(gl_Fog.color.xyz, gl_FragData[0].xyz, Fog);\n");
4478 break;
4479 }
4480 }
4481
4482 shader_addline(buffer, "}\n");
4483
4484 TRACE("Compiling shader object %p\n", (void *)(uintptr_t)shader_obj);
4485 GL_EXTCALL(glShaderSourceARB(shader_obj, 1, (const char**)&buffer->buffer, NULL));
4486 GL_EXTCALL(glCompileShaderARB(shader_obj));
4487 shader_glsl_validate_compile_link(gl_info, shader_obj, FALSE);
4488
4489 /* Store the shader object */
4490 return shader_obj;
4491}
4492
4493/* GL locking is done by the caller */
4494#ifdef VBOX_WITH_VMSVGA
4495static GLhandleARB shader_glsl_generate_vshader(const struct wined3d_context *context,
4496#else
4497static GLuint shader_glsl_generate_vshader(const struct wined3d_context *context,
4498#endif
4499 struct wined3d_shader_buffer *buffer, IWineD3DVertexShaderImpl *This,
4500 const struct vs_compile_args *args)
4501{
4502 const struct shader_reg_maps *reg_maps = &This->baseShader.reg_maps;
4503 const struct wined3d_gl_info *gl_info = context->gl_info;
4504 CONST DWORD *function = This->baseShader.function;
4505 struct shader_glsl_ctx_priv priv_ctx;
4506
4507 /* Create the hw GLSL shader program and assign it as the shader->prgId */
4508 GLhandleARB shader_obj = GL_EXTCALL(glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB));
4509
4510 shader_addline(buffer, "#version 120\n");
4511
4512 if (gl_info->supported[EXT_GPU_SHADER4])
4513 {
4514 shader_addline(buffer, "#extension GL_EXT_gpu_shader4 : enable\n");
4515 }
4516
4517 memset(&priv_ctx, 0, sizeof(priv_ctx));
4518 priv_ctx.cur_vs_args = args;
4519
4520 /* Base Declarations */
4521 shader_generate_glsl_declarations(context, buffer, (IWineD3DBaseShader *)This, reg_maps, &priv_ctx);
4522
4523 /* Base Shader Body */
4524 shader_generate_main((IWineD3DBaseShader*)This, buffer, reg_maps, function, &priv_ctx);
4525
4526 /* Unpack 3.0 outputs */
4527 if (reg_maps->shader_version.major >= 3) shader_addline(buffer, "order_ps_input(OUT);\n");
4528 else shader_addline(buffer, "order_ps_input();\n");
4529
4530 /* The D3DRS_FOGTABLEMODE render state defines if the shader-generated fog coord is used
4531 * or if the fragment depth is used. If the fragment depth is used(FOGTABLEMODE != NONE),
4532 * the fog frag coord is thrown away. If the fog frag coord is used, but not written by
4533 * the shader, it is set to 0.0(fully fogged, since start = 1.0, end = 0.0)
4534 */
4535 if(args->fog_src == VS_FOG_Z) {
4536 shader_addline(buffer, "gl_FogFragCoord = gl_Position.z;\n");
4537 } else if (!reg_maps->fog) {
4538 shader_addline(buffer, "gl_FogFragCoord = 0.0;\n");
4539 }
4540
4541 /* Write the final position.
4542 *
4543 * OpenGL coordinates specify the center of the pixel while d3d coords specify
4544 * the corner. The offsets are stored in z and w in posFixup. posFixup.y contains
4545 * 1.0 or -1.0 to turn the rendering upside down for offscreen rendering. PosFixup.x
4546 * contains 1.0 to allow a mad.
4547 */
4548 shader_addline(buffer, "gl_Position.y = gl_Position.y * posFixup.y;\n");
4549 shader_addline(buffer, "gl_Position.xy += posFixup.zw * gl_Position.ww;\n");
4550 if(args->clip_enabled) {
4551 shader_addline(buffer, "gl_ClipVertex = gl_Position;\n");
4552 }
4553
4554 /* Z coord [0;1]->[-1;1] mapping, see comment in transform_projection in state.c
4555 *
4556 * Basically we want (in homogeneous coordinates) z = z * 2 - 1. However, shaders are run
4557 * before the homogeneous divide, so we have to take the w into account: z = ((z / w) * 2 - 1) * w,
4558 * which is the same as z = z * 2 - w.
4559 */
4560 shader_addline(buffer, "gl_Position.z = gl_Position.z * 2.0 - gl_Position.w;\n");
4561
4562 shader_addline(buffer, "}\n");
4563
4564 TRACE("Compiling shader object %p\n", (void *)(uintptr_t)shader_obj);
4565 GL_EXTCALL(glShaderSourceARB(shader_obj, 1, (const char**)&buffer->buffer, NULL));
4566 GL_EXTCALL(glCompileShaderARB(shader_obj));
4567 shader_glsl_validate_compile_link(gl_info, shader_obj, FALSE);
4568
4569 return shader_obj;
4570}
4571
4572static GLhandleARB find_glsl_pshader(const struct wined3d_context *context,
4573 struct wined3d_shader_buffer *buffer, IWineD3DPixelShaderImpl *shader,
4574 const struct ps_compile_args *args,
4575 UINT *inp2fixup_info
4576 )
4577{
4578 UINT i;
4579 DWORD new_size;
4580 struct glsl_ps_compiled_shader *new_array;
4581 struct glsl_pshader_private *shader_data;
4582 struct ps_np2fixup_info *np2fixup = NULL;
4583 GLhandleARB ret;
4584
4585 if (!shader->baseShader.backend_data)
4586 {
4587 shader->baseShader.backend_data = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*shader_data));
4588 if (!shader->baseShader.backend_data)
4589 {
4590 ERR("Failed to allocate backend data.\n");
4591 return 0;
4592 }
4593 }
4594 shader_data = shader->baseShader.backend_data;
4595
4596 /* Usually we have very few GL shaders for each d3d shader(just 1 or maybe 2),
4597 * so a linear search is more performant than a hashmap or a binary search
4598 * (cache coherency etc)
4599 */
4600 for(i = 0; i < shader_data->num_gl_shaders; i++) {
4601 if(shader_data->gl_shaders[i].context==context
4602 && memcmp(&shader_data->gl_shaders[i].args, args, sizeof(*args)) == 0) {
4603 if(args->np2_fixup) {
4604 *inp2fixup_info = i;
4605 }
4606 return shader_data->gl_shaders[i].prgId;
4607 }
4608 }
4609
4610 TRACE("No matching GL shader found for shader %p, compiling a new shader.\n", shader);
4611 if(shader_data->shader_array_size == shader_data->num_gl_shaders) {
4612 if (shader_data->num_gl_shaders)
4613 {
4614 new_size = shader_data->shader_array_size + max(1, shader_data->shader_array_size / 2);
4615 new_array = HeapReAlloc(GetProcessHeap(), 0, shader_data->gl_shaders,
4616 new_size * sizeof(*shader_data->gl_shaders));
4617 } else {
4618 new_array = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*shader_data->gl_shaders));
4619 new_size = 1;
4620 }
4621
4622 if(!new_array) {
4623 ERR("Out of memory\n");
4624 return 0;
4625 }
4626 shader_data->gl_shaders = new_array;
4627 shader_data->shader_array_size = new_size;
4628 }
4629
4630 shader_data->gl_shaders[shader_data->num_gl_shaders].context = context;
4631 shader_data->gl_shaders[shader_data->num_gl_shaders].args = *args;
4632
4633 memset(&shader_data->gl_shaders[shader_data->num_gl_shaders].np2fixup, 0, sizeof(struct ps_np2fixup_info));
4634 if (args->np2_fixup) np2fixup = &shader_data->gl_shaders[shader_data->num_gl_shaders].np2fixup;
4635
4636 pixelshader_update_samplers(&shader->baseShader.reg_maps,
4637 ((IWineD3DDeviceImpl *)shader->baseShader.device)->stateBlock->textures);
4638
4639 shader_buffer_clear(buffer);
4640 ret = shader_glsl_generate_pshader(context, buffer, shader, args, np2fixup);
4641 *inp2fixup_info = shader_data->num_gl_shaders;
4642 shader_data->gl_shaders[shader_data->num_gl_shaders++].prgId = ret;
4643
4644 return ret;
4645}
4646
4647static inline BOOL vs_args_equal(const struct vs_compile_args *stored, const struct vs_compile_args *new,
4648 const DWORD use_map) {
4649 if((stored->swizzle_map & use_map) != new->swizzle_map) return FALSE;
4650 if((stored->clip_enabled) != new->clip_enabled) return FALSE;
4651 return stored->fog_src == new->fog_src;
4652}
4653
4654static GLhandleARB find_glsl_vshader(const struct wined3d_context *context,
4655 struct wined3d_shader_buffer *buffer, IWineD3DVertexShaderImpl *shader,
4656 const struct vs_compile_args *args)
4657{
4658 UINT i;
4659 DWORD new_size;
4660 struct glsl_vs_compiled_shader *new_array;
4661 DWORD use_map = ((IWineD3DDeviceImpl *)shader->baseShader.device)->strided_streams.use_map;
4662 struct glsl_vshader_private *shader_data;
4663 GLhandleARB ret;
4664
4665 if (!shader->baseShader.backend_data)
4666 {
4667 shader->baseShader.backend_data = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*shader_data));
4668 if (!shader->baseShader.backend_data)
4669 {
4670 ERR("Failed to allocate backend data.\n");
4671 return 0;
4672 }
4673 }
4674 shader_data = shader->baseShader.backend_data;
4675
4676 /* Usually we have very few GL shaders for each d3d shader(just 1 or maybe 2),
4677 * so a linear search is more performant than a hashmap or a binary search
4678 * (cache coherency etc)
4679 */
4680 for(i = 0; i < shader_data->num_gl_shaders; i++) {
4681 if(shader_data->gl_shaders[i].context==context
4682 && vs_args_equal(&shader_data->gl_shaders[i].args, args, use_map)) {
4683 return shader_data->gl_shaders[i].prgId;
4684 }
4685 }
4686
4687 TRACE("No matching GL shader found for shader %p, compiling a new shader.\n", shader);
4688
4689 if(shader_data->shader_array_size == shader_data->num_gl_shaders) {
4690 if (shader_data->num_gl_shaders)
4691 {
4692 new_size = shader_data->shader_array_size + max(1, shader_data->shader_array_size / 2);
4693 new_array = HeapReAlloc(GetProcessHeap(), 0, shader_data->gl_shaders,
4694 new_size * sizeof(*shader_data->gl_shaders));
4695 } else {
4696 new_array = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*shader_data->gl_shaders));
4697 new_size = 1;
4698 }
4699
4700 if(!new_array) {
4701 ERR("Out of memory\n");
4702 return 0;
4703 }
4704 shader_data->gl_shaders = new_array;
4705 shader_data->shader_array_size = new_size;
4706 }
4707
4708 shader_data->gl_shaders[shader_data->num_gl_shaders].context = context;
4709 shader_data->gl_shaders[shader_data->num_gl_shaders].args = *args;
4710
4711 shader_buffer_clear(buffer);
4712 ret = shader_glsl_generate_vshader(context, buffer, shader, args);
4713 shader_data->gl_shaders[shader_data->num_gl_shaders++].prgId = ret;
4714
4715 return ret;
4716}
4717
4718/** Sets the GLSL program ID for the given pixel and vertex shader combination.
4719 * It sets the programId on the current StateBlock (because it should be called
4720 * inside of the DrawPrimitive() part of the render loop).
4721 *
4722 * If a program for the given combination does not exist, create one, and store
4723 * the program in the hash table. If it creates a program, it will link the
4724 * given objects, too.
4725 */
4726
4727/* GL locking is done by the caller */
4728static void set_glsl_shader_program(const struct wined3d_context *context,
4729 IWineD3DDeviceImpl *device, BOOL a_use_ps, BOOL a_use_vs)
4730{
4731 IWineD3DVertexShader *vshader = a_use_vs ? device->stateBlock->vertexShader : NULL;
4732 IWineD3DPixelShader *pshader = a_use_ps ? device->stateBlock->pixelShader : NULL;
4733 const struct wined3d_gl_info *gl_info = context->gl_info;
4734 struct shader_glsl_priv *priv = device->shader_priv;
4735 struct glsl_shader_prog_link *entry = NULL;
4736 GLhandleARB programId = 0;
4737 GLhandleARB reorder_shader_id = 0;
4738 unsigned int i;
4739 char glsl_name[8];
4740 struct ps_compile_args ps_compile_args;
4741 struct vs_compile_args vs_compile_args;
4742
4743#ifdef VBOX
4744 RT_ZERO(ps_compile_args);
4745 RT_ZERO(vs_compile_args);
4746#endif
4747
4748 if (vshader) find_vs_compile_args((IWineD3DVertexShaderImpl *)vshader, device->stateBlock, &vs_compile_args);
4749 if (pshader) find_ps_compile_args((IWineD3DPixelShaderImpl *)pshader, device->stateBlock, &ps_compile_args);
4750
4751 entry = get_glsl_program_entry(priv, vshader, pshader, &vs_compile_args, &ps_compile_args, context);
4752 if (entry) {
4753 priv->glsl_program = entry;
4754 return;
4755 }
4756
4757 /* If we get to this point, then no matching program exists, so we create one */
4758 programId = GL_EXTCALL(glCreateProgramObjectARB());
4759 TRACE("Created new GLSL shader program %p\n", (void *)(uintptr_t)programId);
4760
4761 /* Create the entry */
4762 entry = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(struct glsl_shader_prog_link));
4763 entry->context = context;
4764 entry->programId = programId;
4765 entry->vshader = vshader;
4766 entry->pshader = pshader;
4767 entry->vs_args = vs_compile_args;
4768 entry->ps_args = ps_compile_args;
4769 entry->constant_version = 0;
4770 WINEFIXUPINFO_INIT(entry);
4771 /* Add the hash table entry */
4772 add_glsl_program_entry(priv, entry);
4773
4774 /* Set the current program */
4775 priv->glsl_program = entry;
4776
4777 /* Attach GLSL vshader */
4778 if (vshader)
4779 {
4780 GLhandleARB vshader_id = find_glsl_vshader(context, &priv->shader_buffer,
4781 (IWineD3DVertexShaderImpl *)vshader, &vs_compile_args);
4782 WORD map = ((IWineD3DBaseShaderImpl *)vshader)->baseShader.reg_maps.input_registers;
4783 char tmp_name[10];
4784
4785 reorder_shader_id = generate_param_reorder_function(&priv->shader_buffer, vshader, pshader, gl_info);
4786 TRACE("Attaching GLSL shader object %p to program %p\n", (void *)(uintptr_t)reorder_shader_id, (void *)(uintptr_t)programId);
4787 GL_EXTCALL(glAttachObjectARB(programId, reorder_shader_id));
4788 checkGLcall("glAttachObjectARB");
4789 /* Flag the reorder function for deletion, then it will be freed automatically when the program
4790 * is destroyed
4791 */
4792 GL_EXTCALL(glDeleteObjectARB(reorder_shader_id));
4793
4794 TRACE("Attaching GLSL shader object %p to program %p\n", (void *)(uintptr_t)vshader_id, (void *)(uintptr_t)programId);
4795 GL_EXTCALL(glAttachObjectARB(programId, vshader_id));
4796 checkGLcall("glAttachObjectARB");
4797
4798 /* Bind vertex attributes to a corresponding index number to match
4799 * the same index numbers as ARB_vertex_programs (makes loading
4800 * vertex attributes simpler). With this method, we can use the
4801 * exact same code to load the attributes later for both ARB and
4802 * GLSL shaders.
4803 *
4804 * We have to do this here because we need to know the Program ID
4805 * in order to make the bindings work, and it has to be done prior
4806 * to linking the GLSL program. */
4807 for (i = 0; map; map >>= 1, ++i)
4808 {
4809 if (!(map & 1)) continue;
4810
4811 snprintf(tmp_name, sizeof(tmp_name), "attrib%u", i);
4812 GL_EXTCALL(glBindAttribLocationARB(programId, i, tmp_name));
4813 }
4814 checkGLcall("glBindAttribLocationARB");
4815
4816 list_add_head(&((IWineD3DBaseShaderImpl *)vshader)->baseShader.linked_programs, &entry->vshader_entry);
4817 }
4818#ifdef VBOX_WITH_VMSVGA
4819 else
4820 if (device->strided_streams.position_transformed)
4821 {
4822 GLhandleARB passthrough_vshader_id;
4823
4824 passthrough_vshader_id = generate_passthrough_vshader(gl_info);
4825 TRACE("Attaching GLSL shader object %p to program %p\n", (void *)(uintptr_t)passthrough_vshader_id, (void *)(uintptr_t)programId);
4826 GL_EXTCALL(glAttachObjectARB(programId, passthrough_vshader_id));
4827 checkGLcall("glAttachObjectARB");
4828 /* Flag the reorder function for deletion, then it will be freed automatically when the program
4829 * is destroyed
4830 */
4831 GL_EXTCALL(glDeleteObjectARB(passthrough_vshader_id));
4832 }
4833#endif
4834
4835
4836 /* Attach GLSL pshader */
4837 if (pshader)
4838 {
4839 GLhandleARB pshader_id = find_glsl_pshader(context, &priv->shader_buffer,
4840 (IWineD3DPixelShaderImpl *)pshader, &ps_compile_args,
4841 &entry->inp2Fixup_info
4842 );
4843 TRACE("Attaching GLSL shader object %p to program %p\n", (void *)(uintptr_t)pshader_id, (void *)(uintptr_t)programId);
4844 GL_EXTCALL(glAttachObjectARB(programId, pshader_id));
4845 checkGLcall("glAttachObjectARB");
4846
4847 list_add_head(&((IWineD3DBaseShaderImpl *)pshader)->baseShader.linked_programs, &entry->pshader_entry);
4848 }
4849
4850 /* Link the program */
4851 TRACE("Linking GLSL shader program %p\n", (void *)(uintptr_t)programId);
4852 GL_EXTCALL(glLinkProgramARB(programId));
4853 shader_glsl_validate_compile_link(gl_info, programId, TRUE);
4854
4855 entry->vuniformF_locations = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
4856 sizeof(GLhandleARB) * gl_info->limits.glsl_vs_float_constants);
4857 for (i = 0; i < gl_info->limits.glsl_vs_float_constants; ++i)
4858 {
4859 snprintf(glsl_name, sizeof(glsl_name), "VC[%i]", i);
4860 entry->vuniformF_locations[i] = GL_EXTCALL(glGetUniformLocationARB(programId, glsl_name));
4861 }
4862 for (i = 0; i < MAX_CONST_I; ++i)
4863 {
4864 snprintf(glsl_name, sizeof(glsl_name), "VI[%i]", i);
4865 entry->vuniformI_locations[i] = GL_EXTCALL(glGetUniformLocationARB(programId, glsl_name));
4866 }
4867 entry->puniformF_locations = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
4868 sizeof(GLhandleARB) * gl_info->limits.glsl_ps_float_constants);
4869 for (i = 0; i < gl_info->limits.glsl_ps_float_constants; ++i)
4870 {
4871 snprintf(glsl_name, sizeof(glsl_name), "PC[%i]", i);
4872 entry->puniformF_locations[i] = GL_EXTCALL(glGetUniformLocationARB(programId, glsl_name));
4873 }
4874 for (i = 0; i < MAX_CONST_I; ++i)
4875 {
4876 snprintf(glsl_name, sizeof(glsl_name), "PI[%i]", i);
4877 entry->puniformI_locations[i] = GL_EXTCALL(glGetUniformLocationARB(programId, glsl_name));
4878 }
4879
4880 if(pshader) {
4881 char name[32];
4882
4883 for(i = 0; i < MAX_TEXTURES; i++) {
4884 sprintf(name, "bumpenvmat%u", i);
4885 entry->bumpenvmat_location[i] = GL_EXTCALL(glGetUniformLocationARB(programId, name));
4886 sprintf(name, "luminancescale%u", i);
4887 entry->luminancescale_location[i] = GL_EXTCALL(glGetUniformLocationARB(programId, name));
4888 sprintf(name, "luminanceoffset%u", i);
4889 entry->luminanceoffset_location[i] = GL_EXTCALL(glGetUniformLocationARB(programId, name));
4890 }
4891
4892 if (ps_compile_args.np2_fixup) {
4893 if (WINEFIXUPINFO_ISVALID(entry)) {
4894 entry->np2Fixup_location = GL_EXTCALL(glGetUniformLocationARB(programId, "PsamplerNP2Fixup"));
4895 } else {
4896 FIXME("NP2 texcoord fixup needed for this pixelshader, but no fixup uniform found.\n");
4897 }
4898 }
4899 }
4900
4901 entry->posFixup_location = GL_EXTCALL(glGetUniformLocationARB(programId, "posFixup"));
4902 entry->ycorrection_location = GL_EXTCALL(glGetUniformLocationARB(programId, "ycorrection"));
4903 checkGLcall("Find glsl program uniform locations");
4904
4905 if (pshader
4906 && ((IWineD3DPixelShaderImpl *)pshader)->baseShader.reg_maps.shader_version.major >= 3
4907 && ((IWineD3DPixelShaderImpl *)pshader)->declared_in_count > vec4_varyings(3, gl_info))
4908 {
4909 TRACE("Shader %p needs vertex color clamping disabled\n", (void *)(uintptr_t)programId);
4910 entry->vertex_color_clamp = GL_FALSE;
4911 } else {
4912 entry->vertex_color_clamp = GL_FIXED_ONLY_ARB;
4913 }
4914
4915 /* Set the shader to allow uniform loading on it */
4916 GL_EXTCALL(glUseProgramObjectARB(programId));
4917 checkGLcall("glUseProgramObjectARB(programId)");
4918
4919#ifdef DEBUG_misha
4920 {
4921 GLint programIdTest = -1;
4922 glGetIntegerv(GL_CURRENT_PROGRAM, &programIdTest);
4923 Assert(programIdTest == programId);
4924 }
4925#endif
4926
4927 /* Load the vertex and pixel samplers now. The function that finds the mappings makes sure
4928 * that it stays the same for each vertexshader-pixelshader pair(=linked glsl program). If
4929 * a pshader with fixed function pipeline is used there are no vertex samplers, and if a
4930 * vertex shader with fixed function pixel processing is used we make sure that the card
4931 * supports enough samplers to allow the max number of vertex samplers with all possible
4932 * fixed function fragment processing setups. So once the program is linked these samplers
4933 * won't change.
4934 */
4935 if (vshader) shader_glsl_load_vsamplers(gl_info, device->texUnitMap, programId);
4936 if (pshader) shader_glsl_load_psamplers(gl_info, device->texUnitMap, programId);
4937
4938 /* If the local constants do not have to be loaded with the environment constants,
4939 * load them now to have them hardcoded in the GLSL program. This saves some CPU cycles
4940 * later
4941 */
4942 if (pshader && !((IWineD3DBaseShaderImpl *)pshader)->baseShader.load_local_constsF)
4943 {
4944 hardcode_local_constants((IWineD3DBaseShaderImpl *) pshader, gl_info, programId, 'P');
4945 }
4946 if (vshader && !((IWineD3DBaseShaderImpl *)vshader)->baseShader.load_local_constsF)
4947 {
4948 hardcode_local_constants((IWineD3DBaseShaderImpl *) vshader, gl_info, programId, 'V');
4949 }
4950}
4951
4952/* GL locking is done by the caller */
4953static GLhandleARB create_glsl_blt_shader(const struct wined3d_gl_info *gl_info, enum tex_types tex_type)
4954{
4955 GLhandleARB program_id;
4956 GLhandleARB vshader_id, pshader_id;
4957 static const char *blt_vshader[] =
4958 {
4959 "#version 120\n"
4960 "void main(void)\n"
4961 "{\n"
4962 " gl_Position = gl_Vertex;\n"
4963 " gl_FrontColor = vec4(1.0);\n"
4964 " gl_TexCoord[0] = gl_MultiTexCoord0;\n"
4965 "}\n"
4966 };
4967
4968 static const char *blt_pshaders[tex_type_count] =
4969 {
4970 /* tex_1d */
4971 NULL,
4972 /* tex_2d */
4973 "#version 120\n"
4974 "uniform sampler2D sampler;\n"
4975 "void main(void)\n"
4976 "{\n"
4977 " gl_FragDepth = texture2D(sampler, gl_TexCoord[0].xy).x;\n"
4978 "}\n",
4979 /* tex_3d */
4980 NULL,
4981 /* tex_cube */
4982 "#version 120\n"
4983 "uniform samplerCube sampler;\n"
4984 "void main(void)\n"
4985 "{\n"
4986 " gl_FragDepth = textureCube(sampler, gl_TexCoord[0].xyz).x;\n"
4987 "}\n",
4988 /* tex_rect */
4989 "#version 120\n"
4990 "#extension GL_ARB_texture_rectangle : enable\n"
4991 "uniform sampler2DRect sampler;\n"
4992 "void main(void)\n"
4993 "{\n"
4994 " gl_FragDepth = texture2DRect(sampler, gl_TexCoord[0].xy).x;\n"
4995 "}\n",
4996 };
4997
4998 if (!blt_pshaders[tex_type])
4999 {
5000 FIXME("tex_type %#x not supported\n", tex_type);
5001 tex_type = tex_2d;
5002 }
5003
5004 vshader_id = GL_EXTCALL(glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB));
5005 GL_EXTCALL(glShaderSourceARB(vshader_id, 1, blt_vshader, NULL));
5006 GL_EXTCALL(glCompileShaderARB(vshader_id));
5007 shader_glsl_validate_compile_link(gl_info, vshader_id, FALSE);
5008
5009 pshader_id = GL_EXTCALL(glCreateShaderObjectARB(GL_FRAGMENT_SHADER_ARB));
5010 GL_EXTCALL(glShaderSourceARB(pshader_id, 1, &blt_pshaders[tex_type], NULL));
5011 GL_EXTCALL(glCompileShaderARB(pshader_id));
5012
5013 shader_glsl_validate_compile_link(gl_info, vshader_id, FALSE);
5014
5015 program_id = GL_EXTCALL(glCreateProgramObjectARB());
5016 GL_EXTCALL(glAttachObjectARB(program_id, vshader_id));
5017 GL_EXTCALL(glAttachObjectARB(program_id, pshader_id));
5018 GL_EXTCALL(glLinkProgramARB(program_id));
5019 shader_glsl_validate_compile_link(gl_info, program_id, TRUE);
5020
5021 /* Once linked we can mark the shaders for deletion. They will be deleted once the program
5022 * is destroyed
5023 */
5024 GL_EXTCALL(glDeleteObjectARB(vshader_id));
5025 GL_EXTCALL(glDeleteObjectARB(pshader_id));
5026 return program_id;
5027}
5028
5029/* GL locking is done by the caller */
5030static void shader_glsl_select(const struct wined3d_context *context, BOOL usePS, BOOL useVS)
5031{
5032 const struct wined3d_gl_info *gl_info = context->gl_info;
5033 IWineD3DDeviceImpl *device = context_get_device(context);
5034 struct shader_glsl_priv *priv = device->shader_priv;
5035 GLhandleARB program_id = 0;
5036 GLenum old_vertex_color_clamp, current_vertex_color_clamp;
5037
5038 old_vertex_color_clamp = priv->glsl_program ? priv->glsl_program->vertex_color_clamp : GL_FIXED_ONLY_ARB;
5039
5040 if (useVS || usePS) set_glsl_shader_program(context, device, usePS, useVS);
5041 else priv->glsl_program = NULL;
5042
5043 current_vertex_color_clamp = priv->glsl_program ? priv->glsl_program->vertex_color_clamp : GL_FIXED_ONLY_ARB;
5044
5045 if (old_vertex_color_clamp != current_vertex_color_clamp)
5046 {
5047 if (gl_info->supported[ARB_COLOR_BUFFER_FLOAT])
5048 {
5049 GL_EXTCALL(glClampColorARB(GL_CLAMP_VERTEX_COLOR_ARB, current_vertex_color_clamp));
5050 checkGLcall("glClampColorARB");
5051 }
5052 else
5053 {
5054 FIXME("vertex color clamp needs to be changed, but extension not supported.\n");
5055 }
5056 }
5057
5058 program_id = priv->glsl_program ? priv->glsl_program->programId : 0;
5059 if (program_id) TRACE("Using GLSL program %p\n", (void *)(uintptr_t)program_id);
5060 GL_EXTCALL(glUseProgramObjectARB(program_id));
5061 checkGLcall("glUseProgramObjectARB");
5062#ifdef DEBUG_misha
5063 {
5064 GLint programIdTest = -1;
5065 glGetIntegerv(GL_CURRENT_PROGRAM, &programIdTest);
5066 Assert(programIdTest == program_id);
5067 }
5068#endif
5069
5070 /* In case that NP2 texcoord fixup data is found for the selected program, trigger a reload of the
5071 * constants. This has to be done because it can't be guaranteed that sampler() (from state.c) is
5072 * called between selecting the shader and using it, which results in wrong fixup for some frames. */
5073 if (priv->glsl_program && WINEFIXUPINFO_ISVALID(priv->glsl_program))
5074 {
5075 shader_glsl_load_np2fixup_constants((IWineD3DDevice *)device, usePS, useVS);
5076 }
5077}
5078
5079/* GL locking is done by the caller */
5080static void shader_glsl_select_depth_blt(IWineD3DDevice *iface, enum tex_types tex_type) {
5081 IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *)iface;
5082 const struct wined3d_gl_info *gl_info = &This->adapter->gl_info;
5083 struct shader_glsl_priv *priv = This->shader_priv;
5084 GLhandleARB *blt_program = &priv->depth_blt_program[tex_type];
5085
5086 if (!*blt_program) {
5087 GLint loc;
5088 *blt_program = create_glsl_blt_shader(gl_info, tex_type);
5089 loc = GL_EXTCALL(glGetUniformLocationARB(*blt_program, "sampler"));
5090 GL_EXTCALL(glUseProgramObjectARB(*blt_program));
5091#ifdef DEBUG_misha
5092 {
5093 GLint programIdTest = -1;
5094 glGetIntegerv(GL_CURRENT_PROGRAM, &programIdTest);
5095 Assert(programIdTest == *blt_program);
5096 }
5097#endif
5098 GL_EXTCALL(glUniform1iARB(loc, 0));
5099 } else {
5100 GL_EXTCALL(glUseProgramObjectARB(*blt_program));
5101#ifdef DEBUG_misha
5102 {
5103 GLint programIdTest = -1;
5104 glGetIntegerv(GL_CURRENT_PROGRAM, &programIdTest);
5105 Assert(programIdTest == *blt_program);
5106 }
5107#endif
5108 }
5109}
5110
5111/* GL locking is done by the caller */
5112static void shader_glsl_deselect_depth_blt(IWineD3DDevice *iface) {
5113 IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *)iface;
5114 const struct wined3d_gl_info *gl_info = &This->adapter->gl_info;
5115 struct shader_glsl_priv *priv = This->shader_priv;
5116 GLhandleARB program_id;
5117
5118 program_id = priv->glsl_program ? priv->glsl_program->programId : 0;
5119 if (program_id) TRACE("Using GLSL program %p\n", (void *)(uintptr_t)program_id);
5120
5121 GL_EXTCALL(glUseProgramObjectARB(program_id));
5122 checkGLcall("glUseProgramObjectARB");
5123#ifdef DEBUG_misha
5124 {
5125 GLint programIdTest = -1;
5126 glGetIntegerv(GL_CURRENT_PROGRAM, &programIdTest);
5127 Assert(programIdTest == program_id);
5128 }
5129#endif
5130}
5131
5132static void shader_glsl_destroy(IWineD3DBaseShader *iface) {
5133 const struct list *linked_programs;
5134 IWineD3DBaseShaderImpl *This = (IWineD3DBaseShaderImpl *) iface;
5135 IWineD3DDeviceImpl *device = (IWineD3DDeviceImpl *)This->baseShader.device;
5136 struct shader_glsl_priv *priv = device->shader_priv;
5137 const struct wined3d_gl_info *gl_info;
5138 struct wined3d_context *context;
5139
5140 /* Note: Do not use QueryInterface here to find out which shader type this is because this code
5141 * can be called from IWineD3DBaseShader::Release
5142 */
5143 char pshader = shader_is_pshader_version(This->baseShader.reg_maps.shader_version.type);
5144
5145 if(pshader) {
5146 struct glsl_pshader_private *shader_data;
5147 shader_data = This->baseShader.backend_data;
5148 if(!shader_data || shader_data->num_gl_shaders == 0)
5149 {
5150 HeapFree(GetProcessHeap(), 0, shader_data);
5151 This->baseShader.backend_data = NULL;
5152 return;
5153 }
5154
5155 context = context_acquire(device, NULL, CTXUSAGE_RESOURCELOAD);
5156 gl_info = context->gl_info;
5157
5158 if (priv->glsl_program && (IWineD3DBaseShader *)priv->glsl_program->pshader == iface)
5159 {
5160 ENTER_GL();
5161 shader_glsl_select(context, FALSE, FALSE);
5162 LEAVE_GL();
5163 }
5164 } else {
5165 struct glsl_vshader_private *shader_data;
5166 shader_data = This->baseShader.backend_data;
5167 if(!shader_data || shader_data->num_gl_shaders == 0)
5168 {
5169 HeapFree(GetProcessHeap(), 0, shader_data);
5170 This->baseShader.backend_data = NULL;
5171 return;
5172 }
5173
5174 context = context_acquire(device, NULL, CTXUSAGE_RESOURCELOAD);
5175 gl_info = context->gl_info;
5176
5177 if (priv->glsl_program && (IWineD3DBaseShader *)priv->glsl_program->vshader == iface)
5178 {
5179 ENTER_GL();
5180 shader_glsl_select(context, FALSE, FALSE);
5181 LEAVE_GL();
5182 }
5183 }
5184
5185 linked_programs = &This->baseShader.linked_programs;
5186
5187 TRACE("Deleting linked programs\n");
5188 if (linked_programs->next) {
5189 struct glsl_shader_prog_link *entry, *entry2;
5190
5191 ENTER_GL();
5192 if(pshader) {
5193 LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, linked_programs, struct glsl_shader_prog_link, pshader_entry) {
5194 delete_glsl_program_entry(priv, gl_info, entry);
5195 }
5196 } else {
5197 LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, linked_programs, struct glsl_shader_prog_link, vshader_entry) {
5198 delete_glsl_program_entry(priv, gl_info, entry);
5199 }
5200 }
5201 LEAVE_GL();
5202 }
5203
5204 if(pshader) {
5205 UINT i;
5206 struct glsl_pshader_private *shader_data = This->baseShader.backend_data;
5207
5208 ENTER_GL();
5209 for(i = 0; i < shader_data->num_gl_shaders; i++) {
5210 if (shader_data->gl_shaders[i].context==context_get_current())
5211 {
5212 TRACE("deleting pshader %p\n", (void *)(uintptr_t)shader_data->gl_shaders[i].prgId);
5213 GL_EXTCALL(glDeleteObjectARB(shader_data->gl_shaders[i].prgId));
5214 checkGLcall("glDeleteObjectARB");
5215 }
5216 else
5217 {
5218 WARN("Attempting to delete pshader %p created in ctx %p from ctx %p\n",
5219 (void *)(uintptr_t)shader_data->gl_shaders[i].prgId, shader_data->gl_shaders[i].context, context_get_current());
5220 }
5221 }
5222 LEAVE_GL();
5223 HeapFree(GetProcessHeap(), 0, shader_data->gl_shaders);
5224 }
5225 else
5226 {
5227 UINT i;
5228 struct glsl_vshader_private *shader_data = This->baseShader.backend_data;
5229
5230 ENTER_GL();
5231 for(i = 0; i < shader_data->num_gl_shaders; i++) {
5232 if (shader_data->gl_shaders[i].context==context_get_current())
5233 {
5234 TRACE("deleting vshader %p\n", (void *)(uintptr_t)shader_data->gl_shaders[i].prgId);
5235 GL_EXTCALL(glDeleteObjectARB(shader_data->gl_shaders[i].prgId));
5236 checkGLcall("glDeleteObjectARB");
5237 }
5238 else
5239 {
5240 WARN("Attempting to delete vshader %p created in ctx %p from ctx %p\n",
5241 (void *)(uintptr_t)shader_data->gl_shaders[i].prgId, shader_data->gl_shaders[i].context, context_get_current());
5242 }
5243 }
5244 LEAVE_GL();
5245 HeapFree(GetProcessHeap(), 0, shader_data->gl_shaders);
5246 }
5247
5248 HeapFree(GetProcessHeap(), 0, This->baseShader.backend_data);
5249 This->baseShader.backend_data = NULL;
5250
5251 context_release(context);
5252}
5253
5254static int glsl_program_key_compare(const void *key, const struct wine_rb_entry *entry)
5255{
5256 const glsl_program_key_t *k = key;
5257 const struct glsl_shader_prog_link *prog = WINE_RB_ENTRY_VALUE(entry,
5258 const struct glsl_shader_prog_link, program_lookup_entry);
5259 int cmp;
5260
5261 if (k->context > prog->context) return 1;
5262 else if (k->context < prog->context) return -1;
5263
5264 if (k->vshader > prog->vshader) return 1;
5265 else if (k->vshader < prog->vshader) return -1;
5266
5267 if (k->pshader > prog->pshader) return 1;
5268 else if (k->pshader < prog->pshader) return -1;
5269
5270 if (k->vshader && (cmp = memcmp(&k->vs_args, &prog->vs_args, sizeof(prog->vs_args)))) return cmp;
5271 if (k->pshader && (cmp = memcmp(&k->ps_args, &prog->ps_args, sizeof(prog->ps_args)))) return cmp;
5272
5273 return 0;
5274}
5275
5276static BOOL constant_heap_init(struct constant_heap *heap, unsigned int constant_count)
5277{
5278#ifndef VBOX
5279 SIZE_T size = (constant_count + 1) * sizeof(*heap->entries) + constant_count * sizeof(*heap->positions);
5280 void *mem = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, size);
5281#else
5282 SIZE_T size;
5283 void *mem;
5284
5285 /* Don't trash the heap if the input is bogus. */
5286 if (constant_count == 0)
5287 constant_count = 1;
5288
5289 size = (constant_count + 1) * sizeof(*heap->entries) + constant_count * sizeof(*heap->positions);
5290 mem = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, size);
5291#endif
5292
5293 if (!mem)
5294 {
5295 ERR("Failed to allocate memory\n");
5296 return FALSE;
5297 }
5298
5299 heap->entries = mem;
5300 heap->entries[1].version = 0;
5301 heap->positions = (unsigned int *)(heap->entries + constant_count + 1);
5302 heap->size = 1;
5303
5304 return TRUE;
5305}
5306
5307static void constant_heap_free(struct constant_heap *heap)
5308{
5309 HeapFree(GetProcessHeap(), 0, heap->entries);
5310}
5311
5312static const struct wine_rb_functions wined3d_glsl_program_rb_functions =
5313{
5314 wined3d_rb_alloc,
5315 wined3d_rb_realloc,
5316 wined3d_rb_free,
5317 glsl_program_key_compare,
5318};
5319
5320static HRESULT shader_glsl_alloc(IWineD3DDevice *iface) {
5321 IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *)iface;
5322 const struct wined3d_gl_info *gl_info = &This->adapter->gl_info;
5323 struct shader_glsl_priv *priv = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(struct shader_glsl_priv));
5324 SIZE_T stack_size = wined3d_log2i(max(gl_info->limits.glsl_vs_float_constants,
5325 gl_info->limits.glsl_ps_float_constants)) + 1;
5326
5327 if (!shader_buffer_init(&priv->shader_buffer))
5328 {
5329 ERR("Failed to initialize shader buffer.\n");
5330 goto fail;
5331 }
5332
5333 priv->stack = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, stack_size * sizeof(*priv->stack));
5334 if (!priv->stack)
5335 {
5336 ERR("Failed to allocate memory.\n");
5337 goto fail;
5338 }
5339 if (!constant_heap_init(&priv->vconst_heap, gl_info->limits.glsl_vs_float_constants))
5340 {
5341 ERR("Failed to initialize vertex shader constant heap\n");
5342 goto fail;
5343 }
5344 if (!constant_heap_init(&priv->pconst_heap, gl_info->limits.glsl_ps_float_constants))
5345 {
5346 ERR("Failed to initialize pixel shader constant heap\n");
5347 goto fail;
5348 }
5349
5350 if (wine_rb_init(&priv->program_lookup, &wined3d_glsl_program_rb_functions) == -1)
5351 {
5352 ERR("Failed to initialize rbtree.\n");
5353 goto fail;
5354 }
5355
5356 priv->next_constant_version = 1;
5357
5358 This->shader_priv = priv;
5359 return WINED3D_OK;
5360
5361fail:
5362 constant_heap_free(&priv->pconst_heap);
5363 constant_heap_free(&priv->vconst_heap);
5364 HeapFree(GetProcessHeap(), 0, priv->stack);
5365 shader_buffer_free(&priv->shader_buffer);
5366 HeapFree(GetProcessHeap(), 0, priv);
5367 return E_OUTOFMEMORY;
5368}
5369
5370/* Context activation is done by the caller. */
5371static void shader_glsl_free(IWineD3DDevice *iface) {
5372 IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *)iface;
5373 const struct wined3d_gl_info *gl_info = &This->adapter->gl_info;
5374 struct shader_glsl_priv *priv = This->shader_priv;
5375 int i;
5376
5377 ENTER_GL();
5378 for (i = 0; i < tex_type_count; ++i)
5379 {
5380 if (priv->depth_blt_program[i])
5381 {
5382 GL_EXTCALL(glDeleteObjectARB(priv->depth_blt_program[i]));
5383 }
5384 }
5385 LEAVE_GL();
5386
5387 wine_rb_destroy(&priv->program_lookup, NULL, NULL);
5388 constant_heap_free(&priv->pconst_heap);
5389 constant_heap_free(&priv->vconst_heap);
5390 HeapFree(GetProcessHeap(), 0, priv->stack);
5391 shader_buffer_free(&priv->shader_buffer);
5392
5393 HeapFree(GetProcessHeap(), 0, This->shader_priv);
5394 This->shader_priv = NULL;
5395}
5396
5397static BOOL shader_glsl_dirty_const(IWineD3DDevice *iface) {
5398 /* TODO: GL_EXT_bindable_uniform can be used to share constants across shaders */
5399 return FALSE;
5400}
5401
5402static void shader_glsl_get_caps(const struct wined3d_gl_info *gl_info, struct shader_caps *pCaps)
5403{
5404 /* Nvidia Geforce6/7 or Ati R4xx/R5xx cards with GLSL support, support VS 3.0 but older Nvidia/Ati
5405 * models with GLSL support only support 2.0. In case of nvidia we can detect VS 2.0 support based
5406 * on the version of NV_vertex_program.
5407 * For Ati cards there's no way using glsl (it abstracts the lowlevel info away) and also not
5408 * using ARB_vertex_program. It is safe to assume that when a card supports pixel shader 2.0 it
5409 * supports vertex shader 2.0 too and the way around. We can detect ps2.0 using the maximum number
5410 * of native instructions, so use that here. For more info see the pixel shader versioning code below.
5411 */
5412 if ((gl_info->supported[NV_VERTEX_PROGRAM2] && !gl_info->supported[NV_VERTEX_PROGRAM3])
5413 || gl_info->limits.arb_ps_instructions <= 512
5414 || gl_info->limits.glsl_vs_float_constants < 256)
5415 pCaps->VertexShaderVersion = WINED3DVS_VERSION(2,0);
5416 else
5417 pCaps->VertexShaderVersion = WINED3DVS_VERSION(3,0);
5418 TRACE_(d3d_caps)("Hardware vertex shader version %d.%d enabled (GLSL)\n", (pCaps->VertexShaderVersion >> 8) & 0xff, pCaps->VertexShaderVersion & 0xff);
5419 pCaps->MaxVertexShaderConst = gl_info->limits.glsl_vs_float_constants;
5420
5421 /* Older DX9-class videocards (GeforceFX / Radeon >9500/X*00) only support pixel shader 2.0/2.0a/2.0b.
5422 * In OpenGL the extensions related to GLSL abstract lowlevel GL info away which is needed
5423 * to distinguish between 2.0 and 3.0 (and 2.0a/2.0b). In case of Nvidia we use their fragment
5424 * program extensions. On other hardware including ATI GL_ARB_fragment_program offers the info
5425 * in max native instructions. Intel and others also offer the info in this extension but they
5426 * don't support GLSL (at least on Windows).
5427 *
5428 * PS2.0 requires at least 96 instructions, 2.0a/2.0b go up to 512. Assume that if the number
5429 * of instructions is 512 or less we have to do with ps2.0 hardware.
5430 * NOTE: ps3.0 hardware requires 512 or more instructions but ati and nvidia offer 'enough' (1024 vs 4096) on their most basic ps3.0 hardware.
5431 */
5432 if ((gl_info->supported[NV_FRAGMENT_PROGRAM] && !gl_info->supported[NV_FRAGMENT_PROGRAM2])
5433 || gl_info->limits.arb_ps_instructions <= 512
5434 || gl_info->limits.glsl_vs_float_constants < 256)
5435 pCaps->PixelShaderVersion = WINED3DPS_VERSION(2,0);
5436 else
5437 pCaps->PixelShaderVersion = WINED3DPS_VERSION(3,0);
5438
5439 pCaps->MaxPixelShaderConst = gl_info->limits.glsl_ps_float_constants;
5440
5441 /* FIXME: The following line is card dependent. -8.0 to 8.0 is the
5442 * Direct3D minimum requirement.
5443 *
5444 * Both GL_ARB_fragment_program and GLSL require a "maximum representable magnitude"
5445 * of colors to be 2^10, and 2^32 for other floats. Should we use 1024 here?
5446 *
5447 * The problem is that the refrast clamps temporary results in the shader to
5448 * [-MaxValue;+MaxValue]. If the card's max value is bigger than the one we advertize here,
5449 * then applications may miss the clamping behavior. On the other hand, if it is smaller,
5450 * the shader will generate incorrect results too. Unfortunately, GL deliberately doesn't
5451 * offer a way to query this.
5452 */
5453 pCaps->PixelShader1xMaxValue = 8.0;
5454 TRACE_(d3d_caps)("Hardware pixel shader version %d.%d enabled (GLSL)\n", (pCaps->PixelShaderVersion >> 8) & 0xff, pCaps->PixelShaderVersion & 0xff);
5455
5456 pCaps->VSClipping = TRUE;
5457}
5458
5459static BOOL shader_glsl_color_fixup_supported(struct color_fixup_desc fixup)
5460{
5461 if (TRACE_ON(d3d_shader) && TRACE_ON(d3d))
5462 {
5463 TRACE("Checking support for fixup:\n");
5464 dump_color_fixup_desc(fixup);
5465 }
5466
5467 /* We support everything except YUV conversions. */
5468 if (!is_complex_fixup(fixup))
5469 {
5470 TRACE("[OK]\n");
5471 return TRUE;
5472 }
5473
5474 TRACE("[FAILED]\n");
5475 return FALSE;
5476}
5477
5478static const SHADER_HANDLER shader_glsl_instruction_handler_table[WINED3DSIH_TABLE_SIZE] =
5479{
5480 /* WINED3DSIH_ABS */ shader_glsl_map2gl,
5481 /* WINED3DSIH_ADD */ shader_glsl_arith,
5482 /* WINED3DSIH_BEM */ shader_glsl_bem,
5483 /* WINED3DSIH_BREAK */ shader_glsl_break,
5484 /* WINED3DSIH_BREAKC */ shader_glsl_breakc,
5485 /* WINED3DSIH_BREAKP */ NULL,
5486 /* WINED3DSIH_CALL */ shader_glsl_call,
5487 /* WINED3DSIH_CALLNZ */ shader_glsl_callnz,
5488 /* WINED3DSIH_CMP */ shader_glsl_cmp,
5489 /* WINED3DSIH_CND */ shader_glsl_cnd,
5490 /* WINED3DSIH_CRS */ shader_glsl_cross,
5491 /* WINED3DSIH_CUT */ NULL,
5492 /* WINED3DSIH_DCL */ NULL,
5493 /* WINED3DSIH_DEF */ NULL,
5494 /* WINED3DSIH_DEFB */ NULL,
5495 /* WINED3DSIH_DEFI */ NULL,
5496 /* WINED3DSIH_DP2ADD */ shader_glsl_dp2add,
5497 /* WINED3DSIH_DP3 */ shader_glsl_dot,
5498 /* WINED3DSIH_DP4 */ shader_glsl_dot,
5499 /* WINED3DSIH_DST */ shader_glsl_dst,
5500 /* WINED3DSIH_DSX */ shader_glsl_map2gl,
5501 /* WINED3DSIH_DSY */ shader_glsl_map2gl,
5502 /* WINED3DSIH_ELSE */ shader_glsl_else,
5503 /* WINED3DSIH_EMIT */ NULL,
5504 /* WINED3DSIH_ENDIF */ shader_glsl_end,
5505 /* WINED3DSIH_ENDLOOP */ shader_glsl_end,
5506 /* WINED3DSIH_ENDREP */ shader_glsl_end,
5507 /* WINED3DSIH_EXP */ shader_glsl_map2gl,
5508 /* WINED3DSIH_EXPP */ shader_glsl_expp,
5509 /* WINED3DSIH_FRC */ shader_glsl_map2gl,
5510 /* WINED3DSIH_IADD */ NULL,
5511 /* WINED3DSIH_IF */ shader_glsl_if,
5512 /* WINED3DSIH_IFC */ shader_glsl_ifc,
5513 /* WINED3DSIH_IGE */ NULL,
5514 /* WINED3DSIH_LABEL */ shader_glsl_label,
5515 /* WINED3DSIH_LIT */ shader_glsl_lit,
5516 /* WINED3DSIH_LOG */ shader_glsl_log,
5517 /* WINED3DSIH_LOGP */ shader_glsl_log,
5518 /* WINED3DSIH_LOOP */ shader_glsl_loop,
5519 /* WINED3DSIH_LRP */ shader_glsl_lrp,
5520 /* WINED3DSIH_LT */ NULL,
5521 /* WINED3DSIH_M3x2 */ shader_glsl_mnxn,
5522 /* WINED3DSIH_M3x3 */ shader_glsl_mnxn,
5523 /* WINED3DSIH_M3x4 */ shader_glsl_mnxn,
5524 /* WINED3DSIH_M4x3 */ shader_glsl_mnxn,
5525 /* WINED3DSIH_M4x4 */ shader_glsl_mnxn,
5526 /* WINED3DSIH_MAD */ shader_glsl_mad,
5527 /* WINED3DSIH_MAX */ shader_glsl_map2gl,
5528 /* WINED3DSIH_MIN */ shader_glsl_map2gl,
5529 /* WINED3DSIH_MOV */ shader_glsl_mov,
5530 /* WINED3DSIH_MOVA */ shader_glsl_mov,
5531 /* WINED3DSIH_MUL */ shader_glsl_arith,
5532 /* WINED3DSIH_NOP */ NULL,
5533 /* WINED3DSIH_NRM */ shader_glsl_nrm,
5534 /* WINED3DSIH_PHASE */ NULL,
5535 /* WINED3DSIH_POW */ shader_glsl_pow,
5536 /* WINED3DSIH_RCP */ shader_glsl_rcp,
5537 /* WINED3DSIH_REP */ shader_glsl_rep,
5538 /* WINED3DSIH_RET */ shader_glsl_ret,
5539 /* WINED3DSIH_RSQ */ shader_glsl_rsq,
5540#ifdef VBOX_WITH_VMSVGA
5541 /* WINED3DSIH_SETP */ shader_glsl_setp,
5542#else
5543 /* WINED3DSIH_SETP */ NULL,
5544#endif
5545 /* WINED3DSIH_SGE */ shader_glsl_compare,
5546 /* WINED3DSIH_SGN */ shader_glsl_sgn,
5547 /* WINED3DSIH_SINCOS */ shader_glsl_sincos,
5548 /* WINED3DSIH_SLT */ shader_glsl_compare,
5549 /* WINED3DSIH_SUB */ shader_glsl_arith,
5550 /* WINED3DSIH_TEX */ shader_glsl_tex,
5551 /* WINED3DSIH_TEXBEM */ shader_glsl_texbem,
5552 /* WINED3DSIH_TEXBEML */ shader_glsl_texbem,
5553 /* WINED3DSIH_TEXCOORD */ shader_glsl_texcoord,
5554 /* WINED3DSIH_TEXDEPTH */ shader_glsl_texdepth,
5555 /* WINED3DSIH_TEXDP3 */ shader_glsl_texdp3,
5556 /* WINED3DSIH_TEXDP3TEX */ shader_glsl_texdp3tex,
5557 /* WINED3DSIH_TEXKILL */ shader_glsl_texkill,
5558 /* WINED3DSIH_TEXLDD */ shader_glsl_texldd,
5559 /* WINED3DSIH_TEXLDL */ shader_glsl_texldl,
5560 /* WINED3DSIH_TEXM3x2DEPTH */ shader_glsl_texm3x2depth,
5561 /* WINED3DSIH_TEXM3x2PAD */ shader_glsl_texm3x2pad,
5562 /* WINED3DSIH_TEXM3x2TEX */ shader_glsl_texm3x2tex,
5563 /* WINED3DSIH_TEXM3x3 */ shader_glsl_texm3x3,
5564 /* WINED3DSIH_TEXM3x3DIFF */ NULL,
5565 /* WINED3DSIH_TEXM3x3PAD */ shader_glsl_texm3x3pad,
5566 /* WINED3DSIH_TEXM3x3SPEC */ shader_glsl_texm3x3spec,
5567 /* WINED3DSIH_TEXM3x3TEX */ shader_glsl_texm3x3tex,
5568 /* WINED3DSIH_TEXM3x3VSPEC */ shader_glsl_texm3x3vspec,
5569 /* WINED3DSIH_TEXREG2AR */ shader_glsl_texreg2ar,
5570 /* WINED3DSIH_TEXREG2GB */ shader_glsl_texreg2gb,
5571 /* WINED3DSIH_TEXREG2RGB */ shader_glsl_texreg2rgb,
5572};
5573
5574static void shader_glsl_handle_instruction(const struct wined3d_shader_instruction *ins) {
5575 SHADER_HANDLER hw_fct;
5576
5577 /* Select handler */
5578 hw_fct = shader_glsl_instruction_handler_table[ins->handler_idx];
5579
5580 /* Unhandled opcode */
5581 if (!hw_fct)
5582 {
5583 FIXME("Backend can't handle opcode %#x\n", ins->handler_idx);
5584 return;
5585 }
5586 hw_fct(ins);
5587
5588 shader_glsl_add_instruction_modifiers(ins);
5589}
5590
5591const shader_backend_t glsl_shader_backend = {
5592 shader_glsl_handle_instruction,
5593 shader_glsl_select,
5594 shader_glsl_select_depth_blt,
5595 shader_glsl_deselect_depth_blt,
5596 shader_glsl_update_float_vertex_constants,
5597 shader_glsl_update_float_pixel_constants,
5598 shader_glsl_load_constants,
5599 shader_glsl_load_np2fixup_constants,
5600 shader_glsl_destroy,
5601 shader_glsl_alloc,
5602 shader_glsl_free,
5603 shader_glsl_dirty_const,
5604 shader_glsl_get_caps,
5605 shader_glsl_color_fixup_supported,
5606};
5607
5608#if defined(VBOXWINEDBG_SHADERS) || defined(VBOX_WINE_WITH_PROFILE)
5609void vboxWDbgPrintF(char * szString, ...)
5610{
5611 char szBuffer[4096*2] = {0};
5612 va_list pArgList;
5613 va_start(pArgList, szString);
5614 _vsnprintf(szBuffer, sizeof(szBuffer) / sizeof(szBuffer[0]), szString, pArgList);
5615 va_end(pArgList);
5616
5617 OutputDebugStringA(szBuffer);
5618}
5619#endif
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette