From 8a283ea518158c2accc818dd263dc3fe65fb2b2b Mon Sep 17 00:00:00 2001 From: kaetemi Date: Mon, 4 Aug 2014 06:11:47 +0200 Subject: [PATCH] 3D: User higher quality FXAA version --HG-- branch : multipass-stereo --- code/nel/src/3d/fxaa.cpp | 11 +- code/nel/src/3d/fxaa_program.h | 617 ++++++++++++++++++++- code/nel/src/3d/shaders/compile.bat | 2 +- code/nel/src/3d/shaders/fxaa3_11.h | 28 +- code/nel/src/3d/shaders/fxaa_pp.cg | 23 +- code/nel/src/3d/shaders/fxaa_pp_arbfp1.txt | 340 ++++++++++-- code/nel/src/3d/shaders/fxaa_pp_ps_2_0.txt | 376 ++++++++++--- 7 files changed, 1237 insertions(+), 160 deletions(-) diff --git a/code/nel/src/3d/fxaa.cpp b/code/nel/src/3d/fxaa.cpp index c1eb57607..a1f07aaa1 100644 --- a/code/nel/src/3d/fxaa.cpp +++ b/code/nel/src/3d/fxaa.cpp @@ -200,7 +200,6 @@ void CFXAA::applyEffect() float fwidth = (float)width; float fheight = (float)height; - nldebug("%f, %f", fwidth, fheight); float pwidth = 1.0f / fwidth; float pheight = 1.0f / fheight; float hpwidth = pwidth * 0.5f; @@ -245,10 +244,14 @@ void CFXAA::applyEffect() nlassert(vpok); bool ppok = drv->activePixelProgram(m_PP); nlassert(ppok); - drv->setUniform4f(IDriver::PixelProgram, 0, -n / fwidth, -n / fheight, n / fwidth, n / fheight); // fxaaConsoleRcpFrameOpt - drv->setUniform4f(IDriver::PixelProgram, 1, -2.0f / fwidth, -2.0f / fheight, 2.0f / fwidth, 2.0f / fheight); // fxaaConsoleRcpFrameOpt2 + /*drv->setUniform4f(IDriver::PixelProgram, 0, -n / fwidth, -n / fheight, n / fwidth, n / fheight); // fxaaConsoleRcpFrameOpt + drv->setUniform4f(IDriver::PixelProgram, 1, -2.0f / fwidth, -2.0f / fheight, 2.0f / fwidth, 2.0f / fheight); // fxaaConsoleRcpFrameOpt2*/ + drv->setUniform2f(IDriver::PixelProgram, 0, 1.0f / fwidth, 1.0f / fheight); // fxaaQualityRcpFrame + drv->setUniform1f(IDriver::PixelProgram, 1, 0.75f); // fxaaQualitySubpix + drv->setUniform1f(IDriver::PixelProgram, 2, 0.166f); // fxaaQualityEdgeThreshold + drv->setUniform1f(IDriver::PixelProgram, 3, 0.0833f); // fxaaQualityEdgeThresholdMin drv->setUniformMatrix(IDriver::VertexProgram, 0, IDriver::ModelViewProjection, IDriver::Identity); - drv->setUniform4f(IDriver::VertexProgram, 9, -hpwidth, -hpheight, hpwidth, hpheight); + // drv->setUniform4f(IDriver::VertexProgram, 9, -hpwidth, -hpheight, hpwidth, hpheight); // render effect m_Mat.getObjectPtr()->setTexture(0, otherRenderTarget->getITexture()); diff --git a/code/nel/src/3d/fxaa_program.h b/code/nel/src/3d/fxaa_program.h index 6002a09a4..a6bf847ef 100644 --- a/code/nel/src/3d/fxaa_program.h +++ b/code/nel/src/3d/fxaa_program.h @@ -6,7 +6,7 @@ const char *a_nelvp = "DP4 o[HPOS].z, c[2], v[OPOS];\n" "DP4 o[HPOS].w, c[3], v[OPOS];\n" "MOV o[TEX0].xy, v[TEX0];\n" - "ADD o[TEX1], v[TEX0].xyxy, c[9];\n" + // "ADD o[TEX1], v[TEX0].xyxy, c[9];\n" "END\n"; const char *a_arbfp1_test = @@ -15,7 +15,310 @@ const char *a_arbfp1_test = "TEX result.color, fragment.texcoord[1].zwzw, texture[0], 2D;\n" "END\n"; -const char *a_arbfp1 = +const char *a_arbfp1 = + "!!ARBfp1.0\n" + "OPTION ARB_precision_hint_fastest;\n" + /*"# cgc version 3.1.0013, build date Apr 18 2012\n" + "# command line args: -profile arbfp1 -O3 -fastmath -fastprecision\n" + "# source file: fxaa_pp.cg\n" + "#vendor NVIDIA Corporation\n" + "#version 3.1.0.13\n" + "#profile arbfp1\n" + "#program fxaa_pp\n" + "#semantic fxaa_pp.fxaaQualityRcpFrame\n" + "#semantic fxaa_pp.fxaaQualitySubpix\n" + "#semantic fxaa_pp.fxaaQualityEdgeThreshold\n" + "#semantic fxaa_pp.fxaaQualityEdgeThresholdMin\n" + "#semantic fxaa_pp.nlTex0 : TEX0\n" + "#var float2 pos : $vin.TEXCOORD0 : TEX0 : 0 : 1\n" + "#var float2 fxaaQualityRcpFrame : : c[0] : 2 : 1\n" + "#var float fxaaQualitySubpix : : c[1] : 3 : 1\n" + "#var float fxaaQualityEdgeThreshold : : c[2] : 4 : 1\n" + "#var float fxaaQualityEdgeThresholdMin : : c[3] : 5 : 1\n" + "#var sampler2D nlTex0 : TEX0 : texunit 0 : 6 : 1\n" + "#var float4 oCol : $vout.COLOR : COL : 7 : 1\n" + "#const c[4] = 0 -1 1 -2\n" + "#const c[5] = 2 0.5 0.25 1.5\n" + "#const c[6] = 4 12 0.083333336 3\n"*/ + "PARAM c[7] = { program.env[0..3],\n" + " { 0, -1, 1, -2 },\n" + " { 2, 0.5, 0.25, 1.5 },\n" + " { 4, 12, 0.083333336, 3 } };\n" + "TEMP R0;\n" + "TEMP R1;\n" + "TEMP R2;\n" + "TEMP R3;\n" + "TEMP R4;\n" + "TEMP R5;\n" + "TEMP R6;\n" + "TEMP R7;\n" + "TEMP R8;\n" + "TEMP R9;\n" + "MOV R3.xyz, c[4];\n" + "MAD R2.zw, R3.xyyz, c[0].xyxy, fragment.texcoord[0].xyxy;\n" + "MAD R0.xy, R3, c[0], fragment.texcoord[0];\n" + "MAD R1.xy, R3.zyzw, c[0], fragment.texcoord[0];\n" + "TEX R5.y, R1, texture[0], 2D;\n" + "MAD R1.xy, R3.zxzw, c[0], fragment.texcoord[0];\n" + "ADD R0.zw, fragment.texcoord[0].xyxy, -c[0].xyxy;\n" + "TEX R4.y, R0.zwzw, texture[0], 2D;\n" + "TEX R6.y, R2.zwzw, texture[0], 2D;\n" + "TEX R8, fragment.texcoord[0], texture[0], 2D;\n" + "TEX R1.y, R1, texture[0], 2D;\n" + "TEX R0.y, R0, texture[0], 2D;\n" + "ADD R0.z, R4.y, R5.y;\n" + "MAD R1.z, R0.y, c[4].w, R0;\n" + "MAD R0.zw, R3.xyyx, c[0].xyxy, fragment.texcoord[0].xyxy;\n" + "TEX R2.y, R0.zwzw, texture[0], 2D;\n" + "ADD R0.x, R2.y, R1.y;\n" + "ABS R0.w, R1.z;\n" + "ADD R1.zw, fragment.texcoord[0].xyxy, c[0].xyxy;\n" + "TEX R7.y, R1.zwzw, texture[0], 2D;\n" + "MAD R0.z, R8.y, c[4].w, R0.x;\n" + "ABS R0.z, R0;\n" + "MAD R2.x, R0.z, c[5], R0.w;\n" + "MAD R0.zw, R3.xyxz, c[0].xyxy, fragment.texcoord[0].xyxy;\n" + "TEX R3.y, R0.zwzw, texture[0], 2D;\n" + "ADD R0.z, R0.y, R3.y;\n" + "ADD R1.x, R6.y, R7.y;\n" + "MAD R0.w, R3.y, c[4], R1.x;\n" + "MAD R1.x, R8.y, c[4].w, R0.z;\n" + "ABS R0.w, R0;\n" + "ADD R2.x, R0.w, R2;\n" + "ADD R2.w, R4.y, R6.y;\n" + "ADD R0.w, R5.y, R7.y;\n" + "ABS R1.z, R1.x;\n" + "MAD R1.x, R1.y, c[4].w, R0.w;\n" + "ABS R1.w, R1.x;\n" + "MAD R1.x, R2.y, c[4].w, R2.w;\n" + "MAD R1.z, R1, c[5].x, R1.w;\n" + "ABS R1.x, R1;\n" + "ADD R1.x, R1, R1.z;\n" + "SGE R4.x, R1, R2;\n" + "MAX R1.x, R3.y, R8.y;\n" + "MAX R1.z, R1.y, R1.x;\n" + "MAX R1.x, R0.y, R2.y;\n" + "MAX R1.x, R1, R1.z;\n" + "MIN R1.z, R3.y, R8.y;\n" + "MIN R1.w, R1.y, R1.z;\n" + "MIN R1.z, R0.y, R2.y;\n" + "MIN R1.z, R1, R1.w;\n" + "MUL R2.x, R1, c[2];\n" + "ADD R3.z, R1.x, -R1;\n" + "ABS R3.w, R4.x;\n" + "MAX R1.w, R2.x, c[3].x;\n" + "ADD R2.z, R3, -R1.w;\n" + "CMP R2.x, R2.z, c[4], c[4].z;\n" + "CMP R1.x, -R3.w, c[4], c[4].z;\n" + "MUL R3.w, R2.x, R1.x;\n" + "CMP R1.z, -R3.w, R1.y, R3.y;\n" + "ADD R1.y, -R8, R1.z;\n" + "CMP R1.w, -R3, R2.y, R0.y;\n" + "ADD R0.y, -R8, R1.w;\n" + "MUL R4.x, R2, R4;\n" + "CMP R3.y, -R3.w, c[0], R3.x;\n" + "ABS R4.w, R1.y;\n" + "ABS R4.z, R0.y;\n" + "SGE R0.y, R4.z, R4.w;\n" + "MUL R1.y, R2.x, R0;\n" + "ABS R0.y, R0;\n" + "CMP R4.y, -R0, c[4].x, c[4].z;\n" + "ABS R0.y, R1.x;\n" + "CMP R0.y, -R0, c[4].x, c[4].z;\n" + "MUL R1.x, R2, R0.y;\n" + "CMP R2.y, -R4.x, c[0], c[0].x;\n" + "CMP R2.y, -R1, -R2, R2;\n" + "MAD R1.y, R2, c[5], fragment.texcoord[0];\n" + "CMP R5.z, -R4.x, R1.y, fragment.texcoord[0].y;\n" + "ADD R5.y, R5.z, -R3;\n" + "MAD R0.y, R2, c[5], fragment.texcoord[0].x;\n" + "CMP R3.x, -R1, c[0], R3;\n" + "CMP R6.x, -R3.w, R0.y, fragment.texcoord[0];\n" + "ADD R5.w, R5.z, R3.y;\n" + "ADD R1.x, R6, -R3;\n" + "MOV R1.y, R5;\n" + "TEX R0.y, R1, texture[0], 2D;\n" + "MUL R1.y, R2.x, R4;\n" + "ADD R0.x, R0.z, R0;\n" + "ADD R0.w, R2, R0;\n" + "MAD R0.z, R0.x, c[5].x, R0.w;\n" + "ADD R1.w, R8.y, R1;\n" + "ADD R1.z, R8.y, R1;\n" + "CMP R4.y, -R1, R1.z, R1.w;\n" + "ADD R1.z, R6.x, R3.x;\n" + "MAD R5.x, -R4.y, c[5].y, R0.y;\n" + "MOV R1.w, R5;\n" + "TEX R0.y, R1.zwzw, texture[0], 2D;\n" + "MAX R1.w, R4.z, R4;\n" + "MAD R1.y, -R4, c[5], R0;\n" + "MUL R4.z, R1.w, c[5];\n" + "ABS R0.y, R1;\n" + "SGE R1.w, R0.y, R4.z;\n" + "ABS R6.y, R5.x;\n" + "SGE R0.y, R6, R4.z;\n" + "ABS R1.w, R1;\n" + "CMP R6.y, -R1.w, c[4].x, c[4].z;\n" + "ABS R0.y, R0;\n" + "CMP R5.z, -R0.y, c[4].x, c[4];\n" + "ADD_SAT R0.y, R5.z, R6;\n" + "MUL R4.w, R2.x, R0.y;\n" + "MUL R0.y, R2.x, R6;\n" + "MAD R1.w, R3.y, c[5], R5;\n" + "CMP R6.x, -R0.y, R1.w, R5.w;\n" + "MAD R6.z, R3.x, c[5].w, R1;\n" + "CMP R1.z, -R0.y, R6, R1;\n" + "MOV R1.w, R6.x;\n" + "TEX R0.y, R1.zwzw, texture[0], 2D;\n" + "MUL R1.w, R4, R6.y;\n" + "CMP R6.y, -R1.w, R0, R1;\n" + "MUL R0.y, R2.x, R5.z;\n" + "MAD R1.y, -R3, c[5].w, R5;\n" + "CMP R5.w, -R0.y, R1.y, R5.y;\n" + "MAD R6.z, -R3.x, c[5].w, R1.x;\n" + "CMP R1.x, -R0.y, R6.z, R1;\n" + "MOV R1.y, R5.w;\n" + "TEX R0.y, R1, texture[0], 2D;\n" + "MUL R5.y, R4.w, R5.z;\n" + "CMP R0.y, -R5, R0, R5.x;\n" + "MAD R5.x, -R4.y, c[5].y, R0.y;\n" + "CMP R5.z, -R5.y, R5.x, R0.y;\n" + "MAD R1.y, -R4, c[5], R6;\n" + "CMP R1.y, -R1.w, R1, R6;\n" + "ABS R1.w, R1.y;\n" + "SGE R1.w, R1, R4.z;\n" + "ABS R0.y, R5.z;\n" + "SGE R0.y, R0, R4.z;\n" + "ABS R1.w, R1;\n" + "CMP R6.y, -R1.w, c[4].x, c[4].z;\n" + "ABS R0.y, R0;\n" + "CMP R5.y, -R0, c[4].x, c[4].z;\n" + "ADD_SAT R0.y, R5, R6;\n" + "MUL R5.x, R4.w, R0.y;\n" + "MUL R0.y, R4.w, R6;\n" + "MAD R1.w, R3.y, c[5].x, R6.x;\n" + "CMP R6.x, -R0.y, R1.w, R6;\n" + "MAD R6.z, R3.x, c[5].x, R1;\n" + "CMP R1.z, -R0.y, R6, R1;\n" + "MOV R1.w, R6.x;\n" + "TEX R0.y, R1.zwzw, texture[0], 2D;\n" + "MUL R1.w, R5.x, R6.y;\n" + "CMP R6.y, -R1.w, R0, R1;\n" + "MUL R0.y, R4.w, R5;\n" + "MAD R1.y, -R3, c[5].x, R5.w;\n" + "CMP R4.w, -R0.y, R1.y, R5;\n" + "MAD R6.z, -R3.x, c[5].x, R1.x;\n" + "CMP R1.x, -R0.y, R6.z, R1;\n" + "MOV R1.y, R4.w;\n" + "TEX R0.y, R1, texture[0], 2D;\n" + "MUL R5.y, R5.x, R5;\n" + "CMP R0.y, -R5, R0, R5.z;\n" + "MAD R5.z, -R4.y, c[5].y, R0.y;\n" + "CMP R5.w, -R5.y, R5.z, R0.y;\n" + "MAD R1.y, -R4, c[5], R6;\n" + "CMP R1.y, -R1.w, R1, R6;\n" + "ABS R1.w, R1.y;\n" + "SGE R1.w, R1, R4.z;\n" + "ABS R1.w, R1;\n" + "CMP R6.y, -R1.w, c[4].x, c[4].z;\n" + "ABS R0.y, R5.w;\n" + "SGE R0.y, R0, R4.z;\n" + "ABS R0.y, R0;\n" + "CMP R5.y, -R0, c[4].x, c[4].z;\n" + "ADD_SAT R0.y, R5, R6;\n" + "MUL R5.z, R5.x, R0.y;\n" + "MUL R0.y, R5.x, R6;\n" + "MAD R1.w, R3.y, c[6].x, R6.x;\n" + "CMP R6.x, -R0.y, R1.w, R6;\n" + "MAD R6.z, R3.x, c[6].x, R1;\n" + "CMP R1.z, -R0.y, R6, R1;\n" + "MOV R1.w, R6.x;\n" + "TEX R0.y, R1.zwzw, texture[0], 2D;\n" + "MUL R1.w, R5.z, R6.y;\n" + "CMP R6.y, -R1.w, R0, R1;\n" + "MUL R0.y, R5.x, R5;\n" + "MAD R1.y, -R3, c[6].x, R4.w;\n" + "CMP R4.w, -R0.y, R1.y, R4;\n" + "MAD R5.x, -R3, c[6], R1;\n" + "CMP R1.x, -R0.y, R5, R1;\n" + "MOV R1.y, R4.w;\n" + "TEX R0.y, R1, texture[0], 2D;\n" + "MUL R1.y, R5.z, R5;\n" + "CMP R5.x, -R1.y, R0.y, R5.w;\n" + "MAD R5.y, -R4, c[5], R5.x;\n" + "CMP R1.y, -R1, R5, R5.x;\n" + "MAD R0.y, -R4, c[5], R6;\n" + "CMP R0.y, -R1.w, R0, R6;\n" + "ABS R5.x, R0.y;\n" + "ABS R1.w, R1.y;\n" + "SGE R1.w, R1, R4.z;\n" + "SGE R5.x, R5, R4.z;\n" + "ABS R4.z, R5.x;\n" + "ABS R1.w, R1;\n" + "CMP R4.z, -R4, c[4].x, c[4];\n" + "CMP R1.w, -R1, c[4].x, c[4].z;\n" + "MUL R4.z, R5, R4;\n" + "MAD R5.y, R3.x, c[6], R1.z;\n" + "CMP R5.y, -R4.z, R5, R1.z;\n" + "MAD R5.x, R3.y, c[6].y, R6;\n" + "CMP R1.z, -R4, R5.x, R6.x;\n" + "MUL R1.w, R5.z, R1;\n" + "ADD R4.z, -fragment.texcoord[0].x, R5.y;\n" + "ADD R1.z, -fragment.texcoord[0].y, R1;\n" + "CMP R1.z, -R3.w, R1, R4;\n" + "MAD R4.z, -R3.x, c[6].y, R1.x;\n" + "MAD R3.x, -R3.y, c[6].y, R4.w;\n" + "CMP R3.y, -R1.w, R4.z, R1.x;\n" + "CMP R1.x, -R1.w, R3, R4.w;\n" + "ADD R1.w, fragment.texcoord[0].x, -R3.y;\n" + "ADD R1.x, fragment.texcoord[0].y, -R1;\n" + "CMP R1.x, -R3.w, R1, R1.w;\n" + "SLT R1.w, R1.x, R1.z;\n" + "ADD R3.x, R1, R1.z;\n" + "ABS R1.w, R1;\n" + "MIN R1.x, R1, R1.z;\n" + "CMP R1.w, -R1, c[4].x, c[4].z;\n" + "MUL R1.z, R2.x, R1.w;\n" + "RCP R3.x, R3.x;\n" + "MAD R1.x, R1, -R3, c[5].y;\n" + "MUL R1.w, R4.y, c[5].y;\n" + "SLT R3.x, R1.y, c[4];\n" + "SLT R1.y, R8, R1.w;\n" + "SLT R0.y, R0, c[4].x;\n" + "ADD R0.y, R0, -R1;\n" + "ADD R1.y, -R1, R3.x;\n" + "ABS R0.y, R0;\n" + "ABS R1.y, R1;\n" + "CMP R0.y, -R0, c[4].z, c[4].x;\n" + "CMP R1.y, -R1, c[4].z, c[4].x;\n" + "CMP R0.x, -R1.z, R0.y, R1.y;\n" + "MAD R0.y, R0.z, c[6].z, -R8;\n" + "ABS R0.x, R0;\n" + "CMP R0.x, -R0, c[4], c[4].z;\n" + "MUL R0.x, R2, R0;\n" + "CMP R0.x, -R0, c[4], R1;\n" + "RCP R0.z, R3.z;\n" + "ABS R0.y, R0;\n" + "MUL_SAT R0.y, R0, R0.z;\n" + "MUL R0.z, R0.y, c[4].w;\n" + "ADD R0.z, R0, c[6].w;\n" + "MUL R0.y, R0, R0;\n" + "MUL R0.y, R0.z, R0;\n" + "MUL R0.y, R0, R0;\n" + "MUL R0.y, R0, c[1].x;\n" + "MAX R0.x, R0, R0.y;\n" + "MAD R0.y, R0.x, R2, fragment.texcoord[0];\n" + "MAD R0.z, R0.x, R2.y, fragment.texcoord[0].x;\n" + "CMP R0.x, -R3.w, R0.z, fragment.texcoord[0];\n" + "CMP R0.y, -R4.x, R0, fragment.texcoord[0];\n" + "TEX R0.xyz, R0, texture[0], 2D;\n" + "CMP R1, R2.z, R8, R9;\n" + "MOV R0.w, R8.y;\n" + "CMP result.color, -R2.x, R0, R1;\n" + "END\n"; + /*"# 260 instructions, 10 R-regs\n" + "\n"*/ + +const char *a_arbfp1_ps3 = "!!ARBfp1.0\n" "OPTION ARB_precision_hint_fastest;\n" //# cgc version 3.1.0013, build date Apr 18 2012 @@ -203,7 +506,315 @@ const char *a_ps_2_0_test_avg = "mul r0, r0, c0.x\n" "mov oC0, r0\n"; -const char *a_ps_2_0 = +const char *a_ps_2_0 = + "ps_2_x\n" + /*"// cgc version 3.1.0013, build date Apr 18 2012\n" + "// command line args: -profile ps_2_x -O3 -fastmath -fastprecision\n" + "// source file: fxaa_pp.cg\n" + "//vendor NVIDIA Corporation\n" + "//version 3.1.0.13\n" + "//profile ps_2_x\n" + "//program fxaa_pp\n" + "//semantic fxaa_pp.fxaaQualityRcpFrame\n" + "//semantic fxaa_pp.fxaaQualitySubpix\n" + "//semantic fxaa_pp.fxaaQualityEdgeThreshold\n" + "//semantic fxaa_pp.fxaaQualityEdgeThresholdMin\n" + "//semantic fxaa_pp.nlTex0 : TEX0\n" + "//var float2 pos : $vin.TEXCOORD0 : TEX0 : 0 : 1\n" + "//var float2 fxaaQualityRcpFrame : : c[0] : 2 : 1\n" + "//var float fxaaQualitySubpix : : c[1] : 3 : 1\n" + "//var float fxaaQualityEdgeThreshold : : c[2] : 4 : 1\n" + "//var float fxaaQualityEdgeThresholdMin : : c[3] : 5 : 1\n" + "//var sampler2D nlTex0 : TEX0 : texunit 0 : 6 : 1\n" + "//var float4 oCol : $vout.COLOR : COL : 7 : 1\n" + "//const c[4] = 0 -1 1 -2\n" + "//const c[5] = 2 0.5 0.25 1.5\n" + "//const c[6] = 4 12 0.083333336\n" + "//const c[7] = -2 3\n"*/ + "dcl_2d s0\n" + "def c4, 0.00000000, -1.00000000, 1.00000000, -2.00000000\n" + "def c5, 2.00000000, 0.50000000, 0.25000000, 1.50000000\n" + "def c6, 4.00000000, 12.00000000, 0.08333334, 0\n" + "def c7, -2.00000000, 3.00000000, 0, 0\n" + "dcl t0.xy\n" + "mov r0.zw, c0.xyxy\n" + "mad r3.xy, c4.zxzw, r0.zwzw, t0\n" + "texld r7, r3, s0\n" + "texld r1, t0, s0\n" + "mov r0.xy, c0\n" + "mad r0.xy, c4.yxzw, r0, t0\n" + "texld r8, r0, s0\n" + "mov r0.xy, c0\n" + "mad r0.xy, c4, r0, t0\n" + "texld r9, r0, s0\n" + "add r0.xy, t0, -c0\n" + "texld r5, r0, s0\n" + "mov r3.xy, c0\n" + "mad r3.xy, c4.zyzw, r3, t0\n" + "texld r3, r3, s0\n" + "add r7.x, r8.y, r7.y\n" + "mad r0.z, r1.y, c4.w, r7.x\n" + "add r0.x, r5.y, r3.y\n" + "mad r0.w, r9.y, c4, r0.x\n" + "mov r0.xy, c0\n" + "mad r0.xy, c4.xzzw, r0, t0\n" + "texld r6, r0, s0\n" + "add r5.x, r9.y, r6.y\n" + "abs r0.z, r0\n" + "abs r0.w, r0\n" + "mad r3.x, r0.z, c5, r0.w\n" + "mov r0.zw, c0.xyxy\n" + "mad r4.xy, c4.yzzw, r0.zwzw, t0\n" + "texld r4, r4, s0\n" + "add r0.xy, t0, c0\n" + "texld r0, r0, s0\n" + "add r4.x, r5.y, r4.y\n" + "add r5.y, r3, r0\n" + "add r0.x, r4.y, r0.y\n" + "mad r0.x, r6.y, c4.w, r0\n" + "abs r0.x, r0\n" + "add r0.w, r0.x, r3.x\n" + "mad r0.x, r8.y, c4.w, r4\n" + "mad r0.z, r7.y, c4.w, r5.y\n" + "mad r0.y, r1, c4.w, r5.x\n" + "abs r0.z, r0\n" + "abs r0.y, r0\n" + "mad r0.y, r0, c5.x, r0.z\n" + "abs r0.x, r0\n" + "add r0.x, r0, r0.y\n" + "add r0.x, r0, -r0.w\n" + "cmp r3.y, r0.x, c4.z, c4.x\n" + "max r0.y, r6, r1\n" + "max r0.z, r7.y, r0.y\n" + "max r0.y, r9, r8\n" + "max r0.y, r0, r0.z\n" + "min r0.z, r6.y, r1.y\n" + "min r0.w, r7.y, r0.z\n" + "min r0.z, r9.y, r8.y\n" + "min r0.z, r0, r0.w\n" + "mul r3.x, r0.y, c2\n" + "abs_pp r0.x, r3.y\n" + "add r4.y, r0, -r0.z\n" + "max r0.w, r3.x, c3.x\n" + "add r4.z, r4.y, -r0.w\n" + "cmp_pp r4.w, r4.z, c4.z, c4.x\n" + "mul_pp r5.w, r4, r3.y\n" + "cmp_pp r0.y, -r0.x, c4.z, c4.x\n" + "mul_pp r5.z, r4.w, r0.y\n" + "cmp_pp r3.x, -r0, c4, c4.z\n" + "cmp r6.w, -r5.z, r6.y, r7.y\n" + "cmp r7.w, -r5.z, r9.y, r8.y\n" + "add r0.z, -r1.y, r6.w\n" + "add r0.y, -r1, r7.w\n" + "abs r9.z, r0\n" + "abs r7.y, r0\n" + "add r0.y, r7, -r9.z\n" + "cmp r0.y, r0, c4.z, c4.x\n" + "max r7.y, r7, r9.z\n" + "mul_pp r0.z, r4.w, r0.y\n" + "cmp r0.w, -r5, c0.x, c0.y\n" + "cmp r6.x, -r0.z, r0.w, -r0.w\n" + "mov r0.z, c0.y\n" + "cmp r6.y, -r5.z, c4.x, r0.z\n" + "mad r0.w, r6.x, c5.y, t0.y\n" + "cmp r0.z, -r5.w, t0.y, r0.w\n" + "add r8.z, r0, r6.y\n" + "add r7.z, r0, -r6.y\n" + "mov r9.y, r7.z\n" + "mov r8.y, r8.z\n" + "mad r0.w, r6.x, c5.y, t0.x\n" + "mov r0.x, c0\n" + "mul_pp r3.x, r4.w, r3\n" + "cmp r6.z, -r3.x, c4.x, r0.x\n" + "cmp r0.x, -r5.z, t0, r0.w\n" + "add r9.x, r0, -r6.z\n" + "texld r3, r9, s0\n" + "add r8.x, r0, r6.z\n" + "abs_pp r3.x, r0.y\n" + "texld r0, r8, s0\n" + "cmp_pp r0.x, -r3, c4.z, c4\n" + "add r0.w, r1.y, r6\n" + "add r0.z, r1.y, r7.w\n" + "mul_pp r0.x, r4.w, r0\n" + "cmp r6.w, -r0.x, r0.z, r0\n" + "mad r7.w, -r6, c5.y, r0.y\n" + "mad r8.w, -r6, c5.y, r3.y\n" + "abs r0.y, r7.w\n" + "abs r0.x, r8.w\n" + "mad r0.x, -r7.y, c5.z, r0\n" + "mad r0.y, -r7, c5.z, r0\n" + "cmp r0.x, r0, c4.z, c4\n" + "abs_pp r0.x, r0\n" + "cmp_pp r9.z, -r0.x, c4, c4.x\n" + "cmp r0.y, r0, c4.z, c4.x\n" + "abs_pp r0.y, r0\n" + "cmp_pp r9.w, -r0.y, c4.z, c4.x\n" + "mul_pp r0.x, r4.w, r9.z\n" + "mad r0.y, -r6, c5.w, r7.z\n" + "cmp r7.z, -r0.x, r7, r0.y\n" + "mad r0.z, -r6, c5.w, r9.x\n" + "cmp r9.x, -r0, r9, r0.z\n" + "mov r9.y, r7.z\n" + "texld r3, r9, s0\n" + "add_pp_sat r3.z, r9, r9.w\n" + "mul_pp r0.x, r4.w, r9.w\n" + "mad r0.y, r6, c5.w, r8.z\n" + "cmp r3.x, -r0, r8.z, r0.y\n" + "mad r0.z, r6, c5.w, r8.x\n" + "mul_pp r8.z, r4.w, r3\n" + "cmp r8.x, -r0, r8, r0.z\n" + "mov r8.y, r3.x\n" + "texld r0, r8, s0\n" + "mul_pp r0.w, r8.z, r9\n" + "cmp r3.z, -r0.w, r7.w, r0.y\n" + "mul_pp r0.x, r8.z, r9.z\n" + "cmp r0.y, -r0.x, r8.w, r3\n" + "mad r0.z, -r6.w, c5.y, r0.y\n" + "cmp r8.w, -r0.x, r0.y, r0.z\n" + "mad r3.y, -r6.w, c5, r3.z\n" + "cmp r9.w, -r0, r3.z, r3.y\n" + "abs r0.y, r9.w\n" + "abs r0.x, r8.w\n" + "mad r0.y, -r7, c5.z, r0\n" + "mad r0.x, -r7.y, c5.z, r0\n" + "cmp r0.y, r0, c4.z, c4.x\n" + "abs_pp r0.y, r0\n" + "cmp_pp r10.x, -r0.y, c4.z, c4\n" + "cmp r0.x, r0, c4.z, c4\n" + "abs_pp r0.x, r0\n" + "cmp_pp r9.z, -r0.x, c4, c4.x\n" + "mul_pp r0.x, r8.z, r10\n" + "mad r0.y, r6, c5.x, r3.x\n" + "cmp r7.w, -r0.x, r3.x, r0.y\n" + "mad r0.z, r6, c5.x, r8.x\n" + "cmp r8.x, -r0, r8, r0.z\n" + "mov r8.y, r7.w\n" + "texld r0, r8, s0\n" + "mul_pp r0.w, r8.z, r9.z\n" + "mad r3.x, -r6.z, c5, r9\n" + "mad r0.x, -r6.y, c5, r7.z\n" + "cmp r0.x, -r0.w, r7.z, r0\n" + "add_pp_sat r0.z, r9, r10.x\n" + "mul_pp r7.z, r8, r0\n" + "cmp r9.x, -r0.w, r9, r3\n" + "mov r9.y, r0.x\n" + "texld r3, r9, s0\n" + "mul_pp r0.z, r7, r9\n" + "cmp r0.w, -r0.z, r8, r3.y\n" + "mul_pp r3.x, r7.z, r10\n" + "cmp r3.y, -r3.x, r9.w, r0\n" + "mad r0.y, -r6.w, c5, r0.w\n" + "cmp r8.z, -r0, r0.w, r0.y\n" + "mad r3.z, -r6.w, c5.y, r3.y\n" + "cmp r9.z, -r3.x, r3.y, r3\n" + "abs r0.y, r8.z\n" + "abs r0.z, r9\n" + "mad r0.y, -r7, c5.z, r0\n" + "mad r0.z, -r7.y, c5, r0\n" + "cmp r0.y, r0, c4.z, c4.x\n" + "abs_pp r0.y, r0\n" + "cmp_pp r8.w, -r0.y, c4.z, c4.x\n" + "cmp r0.z, r0, c4, c4.x\n" + "abs_pp r0.z, r0\n" + "cmp_pp r9.w, -r0.z, c4.z, c4.x\n" + "mul_pp r0.y, r7.z, r8.w\n" + "mad r0.z, -r6.y, c6.x, r0.x\n" + "cmp r10.x, -r0.y, r0, r0.z\n" + "mad r0.w, -r6.z, c6.x, r9.x\n" + "cmp r9.x, -r0.y, r9, r0.w\n" + "mov r9.y, r10.x\n" + "texld r3, r9, s0\n" + "mul_pp r0.x, r7.z, r9.w\n" + "mad r0.z, r6, c6.x, r8.x\n" + "mad r0.y, r6, c6.x, r7.w\n" + "cmp r3.x, -r0, r7.w, r0.y\n" + "cmp r8.x, -r0, r8, r0.z\n" + "mov r8.y, r3.x\n" + "texld r0, r8, s0\n" + "add_pp_sat r3.z, r8.w, r9.w\n" + "mul_pp r0.x, r7.z, r3.z\n" + "mul_pp r3.z, r0.x, r9.w\n" + "cmp r0.y, -r3.z, r9.z, r0\n" + "mul_pp r0.z, r0.x, r8.w\n" + "cmp r0.w, -r0.z, r8.z, r3.y\n" + "mad r3.w, -r6, c5.y, r0.y\n" + "cmp r0.y, -r3.z, r0, r3.w\n" + "mad r3.y, -r6.w, c5, r0.w\n" + "cmp r0.z, -r0, r0.w, r3.y\n" + "abs r3.y, r0\n" + "abs r0.w, r0.z\n" + "mad r3.y, -r7, c5.z, r3\n" + "mad r0.w, -r7.y, c5.z, r0\n" + "cmp r3.y, r3, c4.z, c4.x\n" + "abs_pp r3.y, r3\n" + "cmp r0.w, r0, c4.z, c4.x\n" + "cmp_pp r3.z, -r3.y, c4, c4.x\n" + "abs_pp r0.w, r0\n" + "cmp_pp r3.y, -r0.w, c4.z, c4.x\n" + "mul_pp r0.w, r0.x, r3.z\n" + "mul_pp r0.x, r0, r3.y\n" + "mad r3.w, r6.y, c6.y, r3.x\n" + "cmp r3.x, -r0.w, r3, r3.w\n" + "mad r3.z, r6, c6.y, r8.x\n" + "cmp r0.w, -r0, r8.x, r3.z\n" + "mad r3.y, -r6, c6, r10.x\n" + "cmp r3.y, -r0.x, r10.x, r3\n" + "add r3.x, -t0.y, r3\n" + "add r0.w, -t0.x, r0\n" + "cmp r0.w, -r5.z, r0, r3.x\n" + "mad r3.x, -r6.z, c6.y, r9\n" + "cmp r0.x, -r0, r9, r3\n" + "add r3.x, t0.y, -r3.y\n" + "add r0.x, t0, -r0\n" + "cmp r0.x, -r5.z, r0, r3\n" + "add r3.x, r0, -r0.w\n" + "add r3.y, r0.x, r0.w\n" + "cmp r3.x, r3, c4, c4.z\n" + "abs_pp r3.x, r3\n" + "min r0.x, r0, r0.w\n" + "cmp_pp r3.x, -r3, c4.z, c4\n" + "mul_pp r0.w, r4, r3.x\n" + "rcp r3.y, r3.y\n" + "mad r0.x, r0, -r3.y, c5.y\n" + "cmp r3.y, r0, c4.x, c4.z\n" + "mad r3.x, -r6.w, c5.y, r1.y\n" + "cmp r3.x, r3, c4, c4.z\n" + "cmp r0.y, r0.z, c4.x, c4.z\n" + "add_pp r0.z, -r3.x, r3.y\n" + "add_pp r0.y, r0, -r3.x\n" + "abs_pp r0.y, r0\n" + "abs_pp r0.z, r0\n" + "cmp_pp r0.z, -r0, c4.x, c4\n" + "cmp_pp r0.y, -r0, c4.x, c4.z\n" + "cmp_pp r0.y, -r0.w, r0, r0.z\n" + "abs_pp r0.y, r0\n" + "cmp_pp r0.y, -r0, c4.z, c4.x\n" + "mul_pp r0.y, r4.w, r0\n" + "rcp r0.w, r4.y\n" + "cmp r0.x, -r0.y, r0, c4\n" + "add r3.y, r4.x, r5\n" + "add r3.x, r5, r7\n" + "mad r3.x, r3, c5, r3.y\n" + "mad r0.z, r3.x, c6, -r1.y\n" + "abs r0.z, r0\n" + "mul_sat r0.z, r0, r0.w\n" + "mul r0.w, r0.z, r0.z\n" + "mad r0.z, r0, c7.x, c7.y\n" + "mul r0.z, r0, r0.w\n" + "mul r0.z, r0, r0\n" + "mul r0.z, r0, c1.x\n" + "max r0.x, r0, r0.z\n" + "mad r0.y, r0.x, r6.x, t0\n" + "mad r0.z, r0.x, r6.x, t0.x\n" + "cmp r0.x, -r5.z, t0, r0.z\n" + "cmp r0.y, -r5.w, t0, r0\n" + "texld r0, r0, s0\n" + "mov r0.w, r1.y\n" + "cmp r1, r4.z, r2, r1\n" + "cmp r0, -r4.w, r1, r0\n" + "mov oC0, r0\n"; + +const char *a_ps_2_0_ps3 = "ps_2_0\n" // cgc version 3.1.0013, build date Apr 18 2012 // command line args: -profile ps_2_0 -O3 -fastmath -fastprecision diff --git a/code/nel/src/3d/shaders/compile.bat b/code/nel/src/3d/shaders/compile.bat index a1d660d9d..06306a0da 100644 --- a/code/nel/src/3d/shaders/compile.bat +++ b/code/nel/src/3d/shaders/compile.bat @@ -1,3 +1,3 @@ cgc -entry fxaa_pp fxaa_pp.cg -profile arbfp1 -O3 -fastmath -fastprecision -o fxaa_pp_arbfp1.txt -cgc -entry fxaa_pp fxaa_pp.cg -profile ps_2_0 -O3 -fastmath -fastprecision -o fxaa_pp_ps_2_0.txt +cgc -entry fxaa_pp fxaa_pp.cg -profile ps_2_x -O3 -fastmath -fastprecision -o fxaa_pp_ps_2_0.txt cgc -entry fxaa_vp fxaa_vp.cg -profile arbvp1 -fastmath -fastprecision -o fxaa_vp_arbvp1.txt \ No newline at end of file diff --git a/code/nel/src/3d/shaders/fxaa3_11.h b/code/nel/src/3d/shaders/fxaa3_11.h index 0443fd6e2..7cdc32c70 100644 --- a/code/nel/src/3d/shaders/fxaa3_11.h +++ b/code/nel/src/3d/shaders/fxaa3_11.h @@ -695,7 +695,11 @@ struct FxaaTex { SamplerState smpl; Texture2D tex; }; #define FxaaTexGreen4(t, p) t.tex.GatherGreen(t.smpl, p) #define FxaaTexOffGreen4(t, p, o) t.tex.GatherGreen(t.smpl, p, o) #endif - + +#undef FxaaTexTop +#define FxaaTexTop(t, p) tex2D(t, p) +#undef FxaaTexOff +#define FxaaTexOff(t, p, o, r) tex2D(t, p + (o * r)) /*============================================================================ GREEN AS LUMA OPTION SUPPORT FUNCTION @@ -726,7 +730,7 @@ FxaaFloat2 pos, // Use noperspective interpolation here (turn off perspective interpolation). // {xy__} = upper left of pixel // {__zw} = lower right of pixel -FxaaFloat4 fxaaConsolePosPos, +//FxaaFloat4 fxaaConsolePosPos, // // Input color texture. // {rgb_} = color in linear or perceptual color space @@ -738,13 +742,13 @@ FxaaTex tex, // For everything but 360, just use the same input here as for "tex". // For 360, same texture, just alias with a 2nd sampler. // This sampler needs to have an exponent bias of -1. -FxaaTex fxaaConsole360TexExpBiasNegOne, +//FxaaTex fxaaConsole360TexExpBiasNegOne, // // Only used on the optimized 360 version of FXAA Console. // For everything but 360, just use the same input here as for "tex". // For 360, same texture, just alias with a 3nd sampler. // This sampler needs to have an exponent bias of -2. -FxaaTex fxaaConsole360TexExpBiasNegTwo, +//FxaaTex fxaaConsole360TexExpBiasNegTwo, // // Only used on FXAA Quality. // This must be from a constant/uniform. @@ -762,7 +766,7 @@ FxaaFloat2 fxaaQualityRcpFrame, // {_y__} = -N/screenHeightInPixels // {__z_} = N/screenWidthInPixels // {___w} = N/screenHeightInPixels -FxaaFloat4 fxaaConsoleRcpFrameOpt, +//FxaaFloat4 fxaaConsoleRcpFrameOpt, // // Only used on FXAA Console. // Not used on 360, but used on PS3 and PC. @@ -771,7 +775,7 @@ FxaaFloat4 fxaaConsoleRcpFrameOpt, // {_y__} = -2.0/screenHeightInPixels // {__z_} = 2.0/screenWidthInPixels // {___w} = 2.0/screenHeightInPixels -FxaaFloat4 fxaaConsoleRcpFrameOpt2, +//FxaaFloat4 fxaaConsoleRcpFrameOpt2, // // Only used on FXAA Console. // Only used on 360 in place of fxaaConsoleRcpFrameOpt2. @@ -780,7 +784,7 @@ FxaaFloat4 fxaaConsoleRcpFrameOpt2, // {_y__} = 8.0/screenHeightInPixels // {__z_} = -4.0/screenWidthInPixels // {___w} = -4.0/screenHeightInPixels -FxaaFloat4 fxaaConsole360RcpFrameOpt2, +//FxaaFloat4 fxaaConsole360RcpFrameOpt2, // // Only used on FXAA Quality. // This used to be the FXAA_QUALITY__SUBPIX define. @@ -818,7 +822,7 @@ FxaaFloat fxaaQualityEdgeThreshold, // will appear very dark in the green channel! // Tune by looking at mostly non-green content, // then start at zero and increase until aliasing is a problem. -FxaaFloat fxaaQualityEdgeThresholdMin, +FxaaFloat fxaaQualityEdgeThresholdMin // // Only used on FXAA Console. // This used to be the FXAA_CONSOLE__EDGE_SHARPNESS define. @@ -832,7 +836,7 @@ FxaaFloat fxaaQualityEdgeThresholdMin, // 8.0 is sharper (default!!!) // 4.0 is softer // 2.0 is really soft (good only for vector graphics inputs) -FxaaFloat fxaaConsoleEdgeSharpness, +//FxaaFloat fxaaConsoleEdgeSharpness, // // Only used on FXAA Console. // This used to be the FXAA_CONSOLE__EDGE_THRESHOLD define. @@ -846,7 +850,7 @@ FxaaFloat fxaaConsoleEdgeSharpness, // Other platforms can use other values. // 0.125 leaves less aliasing, but is softer (default!!!) // 0.25 leaves more aliasing, and is sharper -FxaaFloat fxaaConsoleEdgeThreshold, +//FxaaFloat fxaaConsoleEdgeThreshold, // // Only used on FXAA Console. // This used to be the FXAA_CONSOLE__EDGE_THRESHOLD_MIN define. @@ -865,14 +869,14 @@ FxaaFloat fxaaConsoleEdgeThreshold, // will appear very dark in the green channel! // Tune by looking at mostly non-green content, // then start at zero and increase until aliasing is a problem. -FxaaFloat fxaaConsoleEdgeThresholdMin, +//FxaaFloat fxaaConsoleEdgeThresholdMin, // // Extra constants for 360 FXAA Console only. // Use zeros or anything else for other platforms. // These must be in physical constant registers and NOT immedates. // Immedates will result in compiler un-optimizing. // {xyzw} = float4(1.0, -1.0, 0.25, -0.25) -FxaaFloat4 fxaaConsole360ConstDir +//FxaaFloat4 fxaaConsole360ConstDir ) { /*--------------------------------------------------------------------------*/ FxaaFloat2 posM; diff --git a/code/nel/src/3d/shaders/fxaa_pp.cg b/code/nel/src/3d/shaders/fxaa_pp.cg index e4993ead8..49622f206 100644 --- a/code/nel/src/3d/shaders/fxaa_pp.cg +++ b/code/nel/src/3d/shaders/fxaa_pp.cg @@ -1,8 +1,9 @@ -#define FXAA_PS3 1 +#define FXAA_PC 1 #define FXAA_HLSL_3 1 #define FXAA_QUALITY__PRESET 12 #define FXAA_EARLY_EXIT 0 +#define FXAA_GREEN_AS_LUMA 1 #define h4tex2Dlod tex2Dlod #define half4 float4 @@ -15,17 +16,36 @@ void fxaa_pp( // Per fragment parameters float2 pos : TEXCOORD0, +#if (FXAA_PS3 == 1) float4 fxaaConsolePosPos : TEXCOORD1, +#endif // Fragment program constants +#if (FXAA_PC == 1) + uniform float2 fxaaQualityRcpFrame, + uniform float fxaaQualitySubpix, + uniform float fxaaQualityEdgeThreshold, + uniform float fxaaQualityEdgeThresholdMin, +#else uniform float4 fxaaConsoleRcpFrameOpt, uniform float4 fxaaConsoleRcpFrameOpt2, +#endif uniform sampler2D nlTex0 : TEX0, // Output color out float4 oCol : COLOR ) { +#if (FXAA_PC == 1) + oCol = FxaaPixelShader( + pos, + nlTex0, + fxaaQualityRcpFrame, + fxaaQualitySubpix, + fxaaQualityEdgeThreshold, + fxaaQualityEdgeThresholdMin + ); +#else oCol = FxaaPixelShader( pos, fxaaConsolePosPos, @@ -33,6 +53,7 @@ void fxaa_pp( fxaaConsoleRcpFrameOpt, fxaaConsoleRcpFrameOpt2 ); +#endif } /* diff --git a/code/nel/src/3d/shaders/fxaa_pp_arbfp1.txt b/code/nel/src/3d/shaders/fxaa_pp_arbfp1.txt index 73ecb767c..5a498ed61 100644 --- a/code/nel/src/3d/shaders/fxaa_pp_arbfp1.txt +++ b/code/nel/src/3d/shaders/fxaa_pp_arbfp1.txt @@ -7,70 +7,294 @@ OPTION ARB_precision_hint_fastest; #version 3.1.0.13 #profile arbfp1 #program fxaa_pp -#semantic fxaa_pp.fxaaConsoleRcpFrameOpt -#semantic fxaa_pp.fxaaConsoleRcpFrameOpt2 +#semantic fxaa_pp.fxaaQualityRcpFrame +#semantic fxaa_pp.fxaaQualitySubpix +#semantic fxaa_pp.fxaaQualityEdgeThreshold +#semantic fxaa_pp.fxaaQualityEdgeThresholdMin #semantic fxaa_pp.nlTex0 : TEX0 #var float2 pos : $vin.TEXCOORD0 : TEX0 : 0 : 1 -#var float4 fxaaConsolePosPos : $vin.TEXCOORD1 : TEX1 : 1 : 1 -#var float4 fxaaConsoleRcpFrameOpt : : c[0] : 2 : 1 -#var float4 fxaaConsoleRcpFrameOpt2 : : c[1] : 3 : 1 -#var sampler2D nlTex0 : TEX0 : texunit 0 : 4 : 1 -#var float4 oCol : $vout.COLOR : COL : 5 : 1 -#const c[2] = 0.125 0 -2 2 -#const c[3] = 0.001953125 0.5 -PARAM c[4] = { program.local[0..1], - { 0.125, 0, -2, 2 }, - { 0.001953125, 0.5 } }; +#var float2 fxaaQualityRcpFrame : : c[0] : 1 : 1 +#var float fxaaQualitySubpix : : c[1] : 2 : 1 +#var float fxaaQualityEdgeThreshold : : c[2] : 3 : 1 +#var float fxaaQualityEdgeThresholdMin : : c[3] : 4 : 1 +#var sampler2D nlTex0 : TEX0 : texunit 0 : 5 : 1 +#var float4 oCol : $vout.COLOR : COL : 6 : 1 +#const c[4] = 0 -1 1 -2 +#const c[5] = 2 0.5 0.25 1.5 +#const c[6] = 4 12 0.083333336 3 +PARAM c[7] = { program.local[0..3], + { 0, -1, 1, -2 }, + { 2, 0.5, 0.25, 1.5 }, + { 4, 12, 0.083333336, 3 } }; TEMP R0; TEMP R1; TEMP R2; TEMP R3; TEMP R4; TEMP R5; -TEX R1.w, fragment.texcoord[1].zyzw, texture[0], 2D; -ADD R0.x, R1.w, c[3]; -TEX R0.w, fragment.texcoord[1].xwzw, texture[0], 2D; -TEX R1.w, fragment.texcoord[1], texture[0], 2D; -ADD R0.y, -R0.x, R0.w; -ADD R0.z, R1.w, R0.y; -TEX R2.w, fragment.texcoord[1].zwzw, texture[0], 2D; -ADD R0.y, -R1.w, R0; -ADD R1.x, R2.w, R0.y; -ADD R1.y, R0.z, -R2.w; -MUL R2.xy, R1, R1; -ADD R0.y, R2.x, R2; -RSQ R0.y, R0.y; -MUL R2.xy, R0.y, R1; -MAD R3.xy, R2, c[0].zwzw, fragment.texcoord[0]; -ABS R0.z, R2.y; -ABS R0.y, R2.x; -MIN R0.y, R0, R0.z; -RCP R0.y, R0.y; -MUL R1.xy, R0.y, R2; -MUL R1.xy, R1, c[2].x; -MIN R1.xy, R1, c[2].w; -TEX R4, R3, texture[0], 2D; -MAD R2.xy, -R2, c[0].zwzw, fragment.texcoord[0]; -TEX R3, R2, texture[0], 2D; -ADD R3, R3, R4; -MAX R1.xy, R1, c[2].z; -MAD R2.xy, R1, c[1].zwzw, fragment.texcoord[0]; -MUL R5, R3, c[3].y; -MAD R1.xy, -R1, c[1].zwzw, fragment.texcoord[0]; -MIN R0.z, R0.x, R2.w; -MIN R0.y, R0.w, R1.w; -MIN R0.y, R0, R0.z; -MAX R0.z, R0.x, R2.w; -MAX R0.x, R0.w, R1.w; -MAX R0.x, R0, R0.z; -TEX R4, R2, texture[0], 2D; -TEX R3, R1, texture[0], 2D; -ADD R3, R3, R4; -MAD R3, R3, c[3].y, R5; -MUL R3, R3, c[3].y; -SLT R0.z, R0.x, R3.w; -SLT R0.x, R3.w, R0.y; -ADD_SAT R0.x, R0, R0.z; -CMP result.color, -R0.x, R5, R3; +TEMP R6; +TEMP R7; +TEMP R8; +TEMP R9; +MOV R3.xyz, c[4]; +MAD R2.zw, R3.xyyz, c[0].xyxy, fragment.texcoord[0].xyxy; +MAD R0.xy, R3, c[0], fragment.texcoord[0]; +MAD R1.xy, R3.zyzw, c[0], fragment.texcoord[0]; +TEX R5.y, R1, texture[0], 2D; +MAD R1.xy, R3.zxzw, c[0], fragment.texcoord[0]; +ADD R0.zw, fragment.texcoord[0].xyxy, -c[0].xyxy; +TEX R4.y, R0.zwzw, texture[0], 2D; +TEX R6.y, R2.zwzw, texture[0], 2D; +TEX R8, fragment.texcoord[0], texture[0], 2D; +TEX R1.y, R1, texture[0], 2D; +TEX R0.y, R0, texture[0], 2D; +ADD R0.z, R4.y, R5.y; +MAD R1.z, R0.y, c[4].w, R0; +MAD R0.zw, R3.xyyx, c[0].xyxy, fragment.texcoord[0].xyxy; +TEX R2.y, R0.zwzw, texture[0], 2D; +ADD R0.x, R2.y, R1.y; +ABS R0.w, R1.z; +ADD R1.zw, fragment.texcoord[0].xyxy, c[0].xyxy; +TEX R7.y, R1.zwzw, texture[0], 2D; +MAD R0.z, R8.y, c[4].w, R0.x; +ABS R0.z, R0; +MAD R2.x, R0.z, c[5], R0.w; +MAD R0.zw, R3.xyxz, c[0].xyxy, fragment.texcoord[0].xyxy; +TEX R3.y, R0.zwzw, texture[0], 2D; +ADD R0.z, R0.y, R3.y; +ADD R1.x, R6.y, R7.y; +MAD R0.w, R3.y, c[4], R1.x; +MAD R1.x, R8.y, c[4].w, R0.z; +ABS R0.w, R0; +ADD R2.x, R0.w, R2; +ADD R2.w, R4.y, R6.y; +ADD R0.w, R5.y, R7.y; +ABS R1.z, R1.x; +MAD R1.x, R1.y, c[4].w, R0.w; +ABS R1.w, R1.x; +MAD R1.x, R2.y, c[4].w, R2.w; +MAD R1.z, R1, c[5].x, R1.w; +ABS R1.x, R1; +ADD R1.x, R1, R1.z; +SGE R4.x, R1, R2; +MAX R1.x, R3.y, R8.y; +MAX R1.z, R1.y, R1.x; +MAX R1.x, R0.y, R2.y; +MAX R1.x, R1, R1.z; +MIN R1.z, R3.y, R8.y; +MIN R1.w, R1.y, R1.z; +MIN R1.z, R0.y, R2.y; +MIN R1.z, R1, R1.w; +MUL R2.x, R1, c[2]; +ADD R3.z, R1.x, -R1; +ABS R3.w, R4.x; +MAX R1.w, R2.x, c[3].x; +ADD R2.z, R3, -R1.w; +CMP R2.x, R2.z, c[4], c[4].z; +CMP R1.x, -R3.w, c[4], c[4].z; +MUL R3.w, R2.x, R1.x; +CMP R1.z, -R3.w, R1.y, R3.y; +ADD R1.y, -R8, R1.z; +CMP R1.w, -R3, R2.y, R0.y; +ADD R0.y, -R8, R1.w; +MUL R4.x, R2, R4; +CMP R3.y, -R3.w, c[0], R3.x; +ABS R4.w, R1.y; +ABS R4.z, R0.y; +SGE R0.y, R4.z, R4.w; +MUL R1.y, R2.x, R0; +ABS R0.y, R0; +CMP R4.y, -R0, c[4].x, c[4].z; +ABS R0.y, R1.x; +CMP R0.y, -R0, c[4].x, c[4].z; +MUL R1.x, R2, R0.y; +CMP R2.y, -R4.x, c[0], c[0].x; +CMP R2.y, -R1, -R2, R2; +MAD R1.y, R2, c[5], fragment.texcoord[0]; +CMP R5.z, -R4.x, R1.y, fragment.texcoord[0].y; +ADD R5.y, R5.z, -R3; +MAD R0.y, R2, c[5], fragment.texcoord[0].x; +CMP R3.x, -R1, c[0], R3; +CMP R6.x, -R3.w, R0.y, fragment.texcoord[0]; +ADD R5.w, R5.z, R3.y; +ADD R1.x, R6, -R3; +MOV R1.y, R5; +TEX R0.y, R1, texture[0], 2D; +MUL R1.y, R2.x, R4; +ADD R0.x, R0.z, R0; +ADD R0.w, R2, R0; +MAD R0.z, R0.x, c[5].x, R0.w; +ADD R1.w, R8.y, R1; +ADD R1.z, R8.y, R1; +CMP R4.y, -R1, R1.z, R1.w; +ADD R1.z, R6.x, R3.x; +MAD R5.x, -R4.y, c[5].y, R0.y; +MOV R1.w, R5; +TEX R0.y, R1.zwzw, texture[0], 2D; +MAX R1.w, R4.z, R4; +MAD R1.y, -R4, c[5], R0; +MUL R4.z, R1.w, c[5]; +ABS R0.y, R1; +SGE R1.w, R0.y, R4.z; +ABS R6.y, R5.x; +SGE R0.y, R6, R4.z; +ABS R1.w, R1; +CMP R6.y, -R1.w, c[4].x, c[4].z; +ABS R0.y, R0; +CMP R5.z, -R0.y, c[4].x, c[4]; +ADD_SAT R0.y, R5.z, R6; +MUL R4.w, R2.x, R0.y; +MUL R0.y, R2.x, R6; +MAD R1.w, R3.y, c[5], R5; +CMP R6.x, -R0.y, R1.w, R5.w; +MAD R6.z, R3.x, c[5].w, R1; +CMP R1.z, -R0.y, R6, R1; +MOV R1.w, R6.x; +TEX R0.y, R1.zwzw, texture[0], 2D; +MUL R1.w, R4, R6.y; +CMP R6.y, -R1.w, R0, R1; +MUL R0.y, R2.x, R5.z; +MAD R1.y, -R3, c[5].w, R5; +CMP R5.w, -R0.y, R1.y, R5.y; +MAD R6.z, -R3.x, c[5].w, R1.x; +CMP R1.x, -R0.y, R6.z, R1; +MOV R1.y, R5.w; +TEX R0.y, R1, texture[0], 2D; +MUL R5.y, R4.w, R5.z; +CMP R0.y, -R5, R0, R5.x; +MAD R5.x, -R4.y, c[5].y, R0.y; +CMP R5.z, -R5.y, R5.x, R0.y; +MAD R1.y, -R4, c[5], R6; +CMP R1.y, -R1.w, R1, R6; +ABS R1.w, R1.y; +SGE R1.w, R1, R4.z; +ABS R0.y, R5.z; +SGE R0.y, R0, R4.z; +ABS R1.w, R1; +CMP R6.y, -R1.w, c[4].x, c[4].z; +ABS R0.y, R0; +CMP R5.y, -R0, c[4].x, c[4].z; +ADD_SAT R0.y, R5, R6; +MUL R5.x, R4.w, R0.y; +MUL R0.y, R4.w, R6; +MAD R1.w, R3.y, c[5].x, R6.x; +CMP R6.x, -R0.y, R1.w, R6; +MAD R6.z, R3.x, c[5].x, R1; +CMP R1.z, -R0.y, R6, R1; +MOV R1.w, R6.x; +TEX R0.y, R1.zwzw, texture[0], 2D; +MUL R1.w, R5.x, R6.y; +CMP R6.y, -R1.w, R0, R1; +MUL R0.y, R4.w, R5; +MAD R1.y, -R3, c[5].x, R5.w; +CMP R4.w, -R0.y, R1.y, R5; +MAD R6.z, -R3.x, c[5].x, R1.x; +CMP R1.x, -R0.y, R6.z, R1; +MOV R1.y, R4.w; +TEX R0.y, R1, texture[0], 2D; +MUL R5.y, R5.x, R5; +CMP R0.y, -R5, R0, R5.z; +MAD R5.z, -R4.y, c[5].y, R0.y; +CMP R5.w, -R5.y, R5.z, R0.y; +MAD R1.y, -R4, c[5], R6; +CMP R1.y, -R1.w, R1, R6; +ABS R1.w, R1.y; +SGE R1.w, R1, R4.z; +ABS R1.w, R1; +CMP R6.y, -R1.w, c[4].x, c[4].z; +ABS R0.y, R5.w; +SGE R0.y, R0, R4.z; +ABS R0.y, R0; +CMP R5.y, -R0, c[4].x, c[4].z; +ADD_SAT R0.y, R5, R6; +MUL R5.z, R5.x, R0.y; +MUL R0.y, R5.x, R6; +MAD R1.w, R3.y, c[6].x, R6.x; +CMP R6.x, -R0.y, R1.w, R6; +MAD R6.z, R3.x, c[6].x, R1; +CMP R1.z, -R0.y, R6, R1; +MOV R1.w, R6.x; +TEX R0.y, R1.zwzw, texture[0], 2D; +MUL R1.w, R5.z, R6.y; +CMP R6.y, -R1.w, R0, R1; +MUL R0.y, R5.x, R5; +MAD R1.y, -R3, c[6].x, R4.w; +CMP R4.w, -R0.y, R1.y, R4; +MAD R5.x, -R3, c[6], R1; +CMP R1.x, -R0.y, R5, R1; +MOV R1.y, R4.w; +TEX R0.y, R1, texture[0], 2D; +MUL R1.y, R5.z, R5; +CMP R5.x, -R1.y, R0.y, R5.w; +MAD R5.y, -R4, c[5], R5.x; +CMP R1.y, -R1, R5, R5.x; +MAD R0.y, -R4, c[5], R6; +CMP R0.y, -R1.w, R0, R6; +ABS R5.x, R0.y; +ABS R1.w, R1.y; +SGE R1.w, R1, R4.z; +SGE R5.x, R5, R4.z; +ABS R4.z, R5.x; +ABS R1.w, R1; +CMP R4.z, -R4, c[4].x, c[4]; +CMP R1.w, -R1, c[4].x, c[4].z; +MUL R4.z, R5, R4; +MAD R5.y, R3.x, c[6], R1.z; +CMP R5.y, -R4.z, R5, R1.z; +MAD R5.x, R3.y, c[6].y, R6; +CMP R1.z, -R4, R5.x, R6.x; +MUL R1.w, R5.z, R1; +ADD R4.z, -fragment.texcoord[0].x, R5.y; +ADD R1.z, -fragment.texcoord[0].y, R1; +CMP R1.z, -R3.w, R1, R4; +MAD R4.z, -R3.x, c[6].y, R1.x; +MAD R3.x, -R3.y, c[6].y, R4.w; +CMP R3.y, -R1.w, R4.z, R1.x; +CMP R1.x, -R1.w, R3, R4.w; +ADD R1.w, fragment.texcoord[0].x, -R3.y; +ADD R1.x, fragment.texcoord[0].y, -R1; +CMP R1.x, -R3.w, R1, R1.w; +SLT R1.w, R1.x, R1.z; +ADD R3.x, R1, R1.z; +ABS R1.w, R1; +MIN R1.x, R1, R1.z; +CMP R1.w, -R1, c[4].x, c[4].z; +MUL R1.z, R2.x, R1.w; +RCP R3.x, R3.x; +MAD R1.x, R1, -R3, c[5].y; +MUL R1.w, R4.y, c[5].y; +SLT R3.x, R1.y, c[4]; +SLT R1.y, R8, R1.w; +SLT R0.y, R0, c[4].x; +ADD R0.y, R0, -R1; +ADD R1.y, -R1, R3.x; +ABS R0.y, R0; +ABS R1.y, R1; +CMP R0.y, -R0, c[4].z, c[4].x; +CMP R1.y, -R1, c[4].z, c[4].x; +CMP R0.x, -R1.z, R0.y, R1.y; +MAD R0.y, R0.z, c[6].z, -R8; +ABS R0.x, R0; +CMP R0.x, -R0, c[4], c[4].z; +MUL R0.x, R2, R0; +CMP R0.x, -R0, c[4], R1; +RCP R0.z, R3.z; +ABS R0.y, R0; +MUL_SAT R0.y, R0, R0.z; +MUL R0.z, R0.y, c[4].w; +ADD R0.z, R0, c[6].w; +MUL R0.y, R0, R0; +MUL R0.y, R0.z, R0; +MUL R0.y, R0, R0; +MUL R0.y, R0, c[1].x; +MAX R0.x, R0, R0.y; +MAD R0.y, R0.x, R2, fragment.texcoord[0]; +MAD R0.z, R0.x, R2.y, fragment.texcoord[0].x; +CMP R0.x, -R3.w, R0.z, fragment.texcoord[0]; +CMP R0.y, -R4.x, R0, fragment.texcoord[0]; +TEX R0.xyz, R0, texture[0], 2D; +CMP R1, R2.z, R8, R9; +MOV R0.w, R8.y; +CMP result.color, -R2.x, R0, R1; END -# 45 instructions, 6 R-regs +# 260 instructions, 10 R-regs diff --git a/code/nel/src/3d/shaders/fxaa_pp_ps_2_0.txt b/code/nel/src/3d/shaders/fxaa_pp_ps_2_0.txt index fcd16fcd0..de51eba42 100644 --- a/code/nel/src/3d/shaders/fxaa_pp_ps_2_0.txt +++ b/code/nel/src/3d/shaders/fxaa_pp_ps_2_0.txt @@ -1,92 +1,306 @@ -ps_2_0 +ps_2_x // cgc version 3.1.0013, build date Apr 18 2012 -// command line args: -profile ps_2_0 -O3 -fastmath -fastprecision +// command line args: -profile ps_2_x -O3 -fastmath -fastprecision // source file: fxaa_pp.cg //vendor NVIDIA Corporation //version 3.1.0.13 -//profile ps_2_0 +//profile ps_2_x //program fxaa_pp -//semantic fxaa_pp.fxaaConsoleRcpFrameOpt -//semantic fxaa_pp.fxaaConsoleRcpFrameOpt2 +//semantic fxaa_pp.fxaaQualityRcpFrame +//semantic fxaa_pp.fxaaQualitySubpix +//semantic fxaa_pp.fxaaQualityEdgeThreshold +//semantic fxaa_pp.fxaaQualityEdgeThresholdMin //semantic fxaa_pp.nlTex0 : TEX0 //var float2 pos : $vin.TEXCOORD0 : TEX0 : 0 : 1 -//var float4 fxaaConsolePosPos : $vin.TEXCOORD1 : TEX1 : 1 : 1 -//var float4 fxaaConsoleRcpFrameOpt : : c[0] : 2 : 1 -//var float4 fxaaConsoleRcpFrameOpt2 : : c[1] : 3 : 1 -//var sampler2D nlTex0 : TEX0 : texunit 0 : 4 : 1 -//var float4 oCol : $vout.COLOR : COL : 5 : 1 -//const c[2] = 0.001953125 0.125 2 -2 -//const c[3] = 0.5 0 1 +//var float2 fxaaQualityRcpFrame : : c[0] : 1 : 1 +//var float fxaaQualitySubpix : : c[1] : 2 : 1 +//var float fxaaQualityEdgeThreshold : : c[2] : 3 : 1 +//var float fxaaQualityEdgeThresholdMin : : c[3] : 4 : 1 +//var sampler2D nlTex0 : TEX0 : texunit 0 : 5 : 1 +//var float4 oCol : $vout.COLOR : COL : 6 : 1 +//const c[4] = 0 -1 1 -2 +//const c[5] = 2 0.5 0.25 1.5 +//const c[6] = 4 12 0.083333336 +//const c[7] = -2 3 dcl_2d s0 -def c2, 0.00195313, 0.12500000, 2.00000000, -2.00000000 -def c3, 0.50000000, 0.00000000, 1.00000000, 0 -dcl t1 +def c4, 0.00000000, -1.00000000, 1.00000000, -2.00000000 +def c5, 2.00000000, 0.50000000, 0.25000000, 1.50000000 +def c6, 4.00000000, 12.00000000, 0.08333334, 0 +def c7, -2.00000000, 3.00000000, 0, 0 dcl t0.xy -texld r5, t1, s0 -mov r1.y, t1.w -mov r1.x, t1.z -mov r2.xy, r1 -mov r0.y, t1.w -mov r0.x, t1 -mov r1.y, t1 -mov r1.x, t1.z -texld r1, r1, s0 -texld r0, r0, s0 -texld r6, r2, s0 -add r0.x, r1.w, c2 -add r2.x, -r0, r0.w -add r1.x, r5.w, r2 -add r2.z, r1.x, -r6.w -add r2.x, -r5.w, r2 -add r2.x, r6.w, r2 -mov r3.x, r2 -mov r3.y, r2.z -mov r2.y, r2.z -mov r1.y, r2.z -mov r1.x, r2 -mul r1.xy, r3, r1 -add r1.x, r1, r1.y -rsq r1.x, r1.x -mul r4.xy, r1.x, r2 -abs r2.x, r4.y -abs r1.x, r4 -min r1.x, r1, r2 -rcp r1.x, r1.x -mul r1.xy, r1.x, r4 -mul r1.xy, r1, c2.y -min r1.xy, r1, c2.z -max r2.xy, r1, c2.w -mov r1.y, c1.w -mov r1.x, c1.z -mad r3.xy, r2, r1, t0 -mov r1.y, c1.w -mov r1.x, c1.z -mad r5.xy, -r2, r1, t0 -mov r1.y, c0.w -mov r1.x, c0.z -mad r2.xy, -r4, r1, t0 -mov r1.y, c0.w -mov r1.x, c0.z -mad r1.xy, r4, r1, t0 -texld r4, r5, s0 +mov r0.zw, c0.xyxy +mad r3.xy, c4.zxzw, r0.zwzw, t0 +texld r7, r3, s0 +texld r1, t0, s0 +mov r0.xy, c0 +mad r0.xy, c4.yxzw, r0, t0 +texld r8, r0, s0 +mov r0.xy, c0 +mad r0.xy, c4, r0, t0 +texld r9, r0, s0 +add r0.xy, t0, -c0 +texld r5, r0, s0 +mov r3.xy, c0 +mad r3.xy, c4.zyzw, r3, t0 texld r3, r3, s0 -texld r1, r1, s0 -texld r2, r2, s0 -add r1, r2, r1 -mul r2, r1, c3.x -add r1, r4, r3 -max r3.x, r0, r6.w -mad r1, r1, c3.x, r2 -mul r4, r1, c3.x -max r1.x, r0.w, r5.w -max r1.x, r1, r3 -add r1.x, -r4.w, r1 -min r3.x, r0.w, r5.w -min r0.x, r0, r6.w -min r0.x, r3, r0 -add r0.x, r4.w, -r0 -cmp r1.x, r1, c3.y, c3.z -cmp r0.x, r0, c3.y, c3.z -add_pp_sat r0.x, r0, r1 -cmp r0, -r0.x, r4, r2 +add r7.x, r8.y, r7.y +mad r0.z, r1.y, c4.w, r7.x +add r0.x, r5.y, r3.y +mad r0.w, r9.y, c4, r0.x +mov r0.xy, c0 +mad r0.xy, c4.xzzw, r0, t0 +texld r6, r0, s0 +add r5.x, r9.y, r6.y +abs r0.z, r0 +abs r0.w, r0 +mad r3.x, r0.z, c5, r0.w +mov r0.zw, c0.xyxy +mad r4.xy, c4.yzzw, r0.zwzw, t0 +texld r4, r4, s0 +add r0.xy, t0, c0 +texld r0, r0, s0 +add r4.x, r5.y, r4.y +add r5.y, r3, r0 +add r0.x, r4.y, r0.y +mad r0.x, r6.y, c4.w, r0 +abs r0.x, r0 +add r0.w, r0.x, r3.x +mad r0.x, r8.y, c4.w, r4 +mad r0.z, r7.y, c4.w, r5.y +mad r0.y, r1, c4.w, r5.x +abs r0.z, r0 +abs r0.y, r0 +mad r0.y, r0, c5.x, r0.z +abs r0.x, r0 +add r0.x, r0, r0.y +add r0.x, r0, -r0.w +cmp r3.y, r0.x, c4.z, c4.x +max r0.y, r6, r1 +max r0.z, r7.y, r0.y +max r0.y, r9, r8 +max r0.y, r0, r0.z +min r0.z, r6.y, r1.y +min r0.w, r7.y, r0.z +min r0.z, r9.y, r8.y +min r0.z, r0, r0.w +mul r3.x, r0.y, c2 +abs_pp r0.x, r3.y +add r4.y, r0, -r0.z +max r0.w, r3.x, c3.x +add r4.z, r4.y, -r0.w +cmp_pp r4.w, r4.z, c4.z, c4.x +mul_pp r5.w, r4, r3.y +cmp_pp r0.y, -r0.x, c4.z, c4.x +mul_pp r5.z, r4.w, r0.y +cmp_pp r3.x, -r0, c4, c4.z +cmp r6.w, -r5.z, r6.y, r7.y +cmp r7.w, -r5.z, r9.y, r8.y +add r0.z, -r1.y, r6.w +add r0.y, -r1, r7.w +abs r9.z, r0 +abs r7.y, r0 +add r0.y, r7, -r9.z +cmp r0.y, r0, c4.z, c4.x +max r7.y, r7, r9.z +mul_pp r0.z, r4.w, r0.y +cmp r0.w, -r5, c0.x, c0.y +cmp r6.x, -r0.z, r0.w, -r0.w +mov r0.z, c0.y +cmp r6.y, -r5.z, c4.x, r0.z +mad r0.w, r6.x, c5.y, t0.y +cmp r0.z, -r5.w, t0.y, r0.w +add r8.z, r0, r6.y +add r7.z, r0, -r6.y +mov r9.y, r7.z +mov r8.y, r8.z +mad r0.w, r6.x, c5.y, t0.x +mov r0.x, c0 +mul_pp r3.x, r4.w, r3 +cmp r6.z, -r3.x, c4.x, r0.x +cmp r0.x, -r5.z, t0, r0.w +add r9.x, r0, -r6.z +texld r3, r9, s0 +add r8.x, r0, r6.z +abs_pp r3.x, r0.y +texld r0, r8, s0 +cmp_pp r0.x, -r3, c4.z, c4 +add r0.w, r1.y, r6 +add r0.z, r1.y, r7.w +mul_pp r0.x, r4.w, r0 +cmp r6.w, -r0.x, r0.z, r0 +mad r7.w, -r6, c5.y, r0.y +mad r8.w, -r6, c5.y, r3.y +abs r0.y, r7.w +abs r0.x, r8.w +mad r0.x, -r7.y, c5.z, r0 +mad r0.y, -r7, c5.z, r0 +cmp r0.x, r0, c4.z, c4 +abs_pp r0.x, r0 +cmp_pp r9.z, -r0.x, c4, c4.x +cmp r0.y, r0, c4.z, c4.x +abs_pp r0.y, r0 +cmp_pp r9.w, -r0.y, c4.z, c4.x +mul_pp r0.x, r4.w, r9.z +mad r0.y, -r6, c5.w, r7.z +cmp r7.z, -r0.x, r7, r0.y +mad r0.z, -r6, c5.w, r9.x +cmp r9.x, -r0, r9, r0.z +mov r9.y, r7.z +texld r3, r9, s0 +add_pp_sat r3.z, r9, r9.w +mul_pp r0.x, r4.w, r9.w +mad r0.y, r6, c5.w, r8.z +cmp r3.x, -r0, r8.z, r0.y +mad r0.z, r6, c5.w, r8.x +mul_pp r8.z, r4.w, r3 +cmp r8.x, -r0, r8, r0.z +mov r8.y, r3.x +texld r0, r8, s0 +mul_pp r0.w, r8.z, r9 +cmp r3.z, -r0.w, r7.w, r0.y +mul_pp r0.x, r8.z, r9.z +cmp r0.y, -r0.x, r8.w, r3 +mad r0.z, -r6.w, c5.y, r0.y +cmp r8.w, -r0.x, r0.y, r0.z +mad r3.y, -r6.w, c5, r3.z +cmp r9.w, -r0, r3.z, r3.y +abs r0.y, r9.w +abs r0.x, r8.w +mad r0.y, -r7, c5.z, r0 +mad r0.x, -r7.y, c5.z, r0 +cmp r0.y, r0, c4.z, c4.x +abs_pp r0.y, r0 +cmp_pp r10.x, -r0.y, c4.z, c4 +cmp r0.x, r0, c4.z, c4 +abs_pp r0.x, r0 +cmp_pp r9.z, -r0.x, c4, c4.x +mul_pp r0.x, r8.z, r10 +mad r0.y, r6, c5.x, r3.x +cmp r7.w, -r0.x, r3.x, r0.y +mad r0.z, r6, c5.x, r8.x +cmp r8.x, -r0, r8, r0.z +mov r8.y, r7.w +texld r0, r8, s0 +mul_pp r0.w, r8.z, r9.z +mad r3.x, -r6.z, c5, r9 +mad r0.x, -r6.y, c5, r7.z +cmp r0.x, -r0.w, r7.z, r0 +add_pp_sat r0.z, r9, r10.x +mul_pp r7.z, r8, r0 +cmp r9.x, -r0.w, r9, r3 +mov r9.y, r0.x +texld r3, r9, s0 +mul_pp r0.z, r7, r9 +cmp r0.w, -r0.z, r8, r3.y +mul_pp r3.x, r7.z, r10 +cmp r3.y, -r3.x, r9.w, r0 +mad r0.y, -r6.w, c5, r0.w +cmp r8.z, -r0, r0.w, r0.y +mad r3.z, -r6.w, c5.y, r3.y +cmp r9.z, -r3.x, r3.y, r3 +abs r0.y, r8.z +abs r0.z, r9 +mad r0.y, -r7, c5.z, r0 +mad r0.z, -r7.y, c5, r0 +cmp r0.y, r0, c4.z, c4.x +abs_pp r0.y, r0 +cmp_pp r8.w, -r0.y, c4.z, c4.x +cmp r0.z, r0, c4, c4.x +abs_pp r0.z, r0 +cmp_pp r9.w, -r0.z, c4.z, c4.x +mul_pp r0.y, r7.z, r8.w +mad r0.z, -r6.y, c6.x, r0.x +cmp r10.x, -r0.y, r0, r0.z +mad r0.w, -r6.z, c6.x, r9.x +cmp r9.x, -r0.y, r9, r0.w +mov r9.y, r10.x +texld r3, r9, s0 +mul_pp r0.x, r7.z, r9.w +mad r0.z, r6, c6.x, r8.x +mad r0.y, r6, c6.x, r7.w +cmp r3.x, -r0, r7.w, r0.y +cmp r8.x, -r0, r8, r0.z +mov r8.y, r3.x +texld r0, r8, s0 +add_pp_sat r3.z, r8.w, r9.w +mul_pp r0.x, r7.z, r3.z +mul_pp r3.z, r0.x, r9.w +cmp r0.y, -r3.z, r9.z, r0 +mul_pp r0.z, r0.x, r8.w +cmp r0.w, -r0.z, r8.z, r3.y +mad r3.w, -r6, c5.y, r0.y +cmp r0.y, -r3.z, r0, r3.w +mad r3.y, -r6.w, c5, r0.w +cmp r0.z, -r0, r0.w, r3.y +abs r3.y, r0 +abs r0.w, r0.z +mad r3.y, -r7, c5.z, r3 +mad r0.w, -r7.y, c5.z, r0 +cmp r3.y, r3, c4.z, c4.x +abs_pp r3.y, r3 +cmp r0.w, r0, c4.z, c4.x +cmp_pp r3.z, -r3.y, c4, c4.x +abs_pp r0.w, r0 +cmp_pp r3.y, -r0.w, c4.z, c4.x +mul_pp r0.w, r0.x, r3.z +mul_pp r0.x, r0, r3.y +mad r3.w, r6.y, c6.y, r3.x +cmp r3.x, -r0.w, r3, r3.w +mad r3.z, r6, c6.y, r8.x +cmp r0.w, -r0, r8.x, r3.z +mad r3.y, -r6, c6, r10.x +cmp r3.y, -r0.x, r10.x, r3 +add r3.x, -t0.y, r3 +add r0.w, -t0.x, r0 +cmp r0.w, -r5.z, r0, r3.x +mad r3.x, -r6.z, c6.y, r9 +cmp r0.x, -r0, r9, r3 +add r3.x, t0.y, -r3.y +add r0.x, t0, -r0 +cmp r0.x, -r5.z, r0, r3 +add r3.x, r0, -r0.w +add r3.y, r0.x, r0.w +cmp r3.x, r3, c4, c4.z +abs_pp r3.x, r3 +min r0.x, r0, r0.w +cmp_pp r3.x, -r3, c4.z, c4 +mul_pp r0.w, r4, r3.x +rcp r3.y, r3.y +mad r0.x, r0, -r3.y, c5.y +cmp r3.y, r0, c4.x, c4.z +mad r3.x, -r6.w, c5.y, r1.y +cmp r3.x, r3, c4, c4.z +cmp r0.y, r0.z, c4.x, c4.z +add_pp r0.z, -r3.x, r3.y +add_pp r0.y, r0, -r3.x +abs_pp r0.y, r0 +abs_pp r0.z, r0 +cmp_pp r0.z, -r0, c4.x, c4 +cmp_pp r0.y, -r0, c4.x, c4.z +cmp_pp r0.y, -r0.w, r0, r0.z +abs_pp r0.y, r0 +cmp_pp r0.y, -r0, c4.z, c4.x +mul_pp r0.y, r4.w, r0 +rcp r0.w, r4.y +cmp r0.x, -r0.y, r0, c4 +add r3.y, r4.x, r5 +add r3.x, r5, r7 +mad r3.x, r3, c5, r3.y +mad r0.z, r3.x, c6, -r1.y +abs r0.z, r0 +mul_sat r0.z, r0, r0.w +mul r0.w, r0.z, r0.z +mad r0.z, r0, c7.x, c7.y +mul r0.z, r0, r0.w +mul r0.z, r0, r0 +mul r0.z, r0, c1.x +max r0.x, r0, r0.z +mad r0.y, r0.x, r6.x, t0 +mad r0.z, r0.x, r6.x, t0.x +cmp r0.x, -r5.z, t0, r0.z +cmp r0.y, -r5.w, t0, r0 +texld r0, r0, s0 +mov r0.w, r1.y +cmp r1, r4.z, r2, r1 +cmp r0, -r4.w, r1, r0 mov oC0, r0