Includes:
#define OUTPUT_SHADER_INSTRUCTION_COUNT
#ifdef OUTPUT_SHADER_INSTRUCTION_COUNT
#define INITGUID
#include "D3D11Shader.h"
#include "D3Dcompiler.h"
#include "GHDebugMessage.h"
#endif
After loading the shader buffer:
#ifdef OUTPUT_SHADER_INSTRUCTION_COUNTWe have a very unoptimized experimental shader. I ran some instruction count measurements on some simple changes.
ID3D11ShaderReflection* pReflector = NULL;
D3DReflect(fileBuf, fileLen, IID_ID3D11ShaderReflection, (void**)&pReflector);
D3D11_SHADER_DESC shaderDesc;
pReflector->GetDesc(&shaderDesc);
GHDebugMessage::outputString("Shader %s instruction count %d", shaderName, shaderDesc.InstructionCount);
#endif
Starting instruction count: 272
Ending instruction count: 235
Instructions saved: 37
Adding lerp:
Shader sbterrainpixel.cso instruction count 270(saves 2 instructions)
old:
float4 color;new:
color.w = 1.0;
color.xyz = (cliffColor.xyz*blendColor.y) + (groundColor.xyz*blendColor.x);
float4 color = lerp(cliffColor, groundColor, blendColor.x);
Swizzle:
No difference with or without the swizzle.color.xyz *= blendColor.z;
color *= blendColor.z
Vector Ops:
Shader sbterrainpixel.cso instruction count 262(saves 1 instruction per function call)
old:
float2 offsetUV = float2(offsetShadProj.x / offsetShadProj.w, 1.0f - offsetShadProj.y / offsetShadProj.w);
return tex.Sample(samp, offsetUV).x;
new:
float2 offsetUV = offsetShadProj.xy / offsetShadProj.w;
offsetUV.y = 1.0 - offsetUV.y;
return tex.Sample(samp, offsetUV).x;
More Vector Ops:
Shader sbterrainpixel.cso instruction count 246(saves 4 instructions per function call)
old:
const float xPixelOffset = 0.0015;
const float yPixelOffset = 0.0015;
float4 offsetShadProj = shadowPos + float4(offSet.x * xPixelOffset,
offSet.y * yPixelOffset, 0.0, 0.0);
new:
const float2 pixelOffset = float2(0.002, 0.002);
float2 multOffset = offSet.xy * pixelOffset;
float4 offsetShadProj = shadowPos + float4(multOffset.xy, 0.0, 0.0);
Again Vector Ops:
Shader sbterrainpixel.cso instruction count 245(saves 1 instruction)
old:
float4 trailColor = TrailTexture.Sample(TrailTextureSampler, float2(input.trailPos.x / input.trailPos.w, 1.0 - input.trailPos.y / input.trailPos.w));
new:
float2 trailUV = input.trailPos.xy / input.trailPos.w;
trailUV.y = 1.0 - trailUV.y;
float4 trailColor = TrailTexture.Sample(TrailTextureSampler, trailUV);
Bad Code:
Shader sbterrainpixel.cso instruction count 239(saves 6 instructions)
old:
float shadInBorder = saturate(step(0.95, shadCenter.x) + step(0.95, shadCenter.y) +
step(shadCenter.x, 0.05) + step(shadCenter.y, 0.05));
// todo: more efficient.
color.xyz -= (color.xyz*(1.0-shadTot) * (1.0-shadInBorder));
new:
// if we're on the border, come up with a value bigger than 1.
float shadInBorder = step(0.95, shadCenter.x) + step(0.95, shadCenter.y) +
step(shadCenter.x, 0.05) + step(shadCenter.y, 0.05);
// multiply color by the shadow value, unless we are on the border.
color.xyz *= saturate(shadTot + shadInBorder);
More Bad Code:
Shader sbterrainpixel.cso instruction count 235(saves 4 instructions)
old:
float shadInWideBorder = saturate(step(0.95, wideShadowUV.x) + step(0.95, wideShadowUV.y) + step(wideShadowUV.x, 0.05) + step(wideShadowUV.y, 0.05));
color.xyz -= (color.xyz*(1.0 - wideshadTot)) * shadInBorder * (1.0-shadInWideBorder);
new:
// apply wide shadow if we are not in the wide border and are in the short border.
float shadInWideBorder = step(0.95, wideShadowUV.x) + step(0.95, wideShadowUV.y) + step(wideShadowUV.x, 0.05) + step(wideShadowUV.y, 0.05);
color.xyz *= saturate(wideshadTot + shadInWideBorder + step(shadInBorder, 0.9));