From cead54c32edfe7520f1a9be8e4e10c03b01f8127 Mon Sep 17 00:00:00 2001 From: hydrogendeuteride Date: Fri, 26 Dec 2025 14:29:59 +0900 Subject: [PATCH] OPTIM: shader optimization --- shaders/background_env.frag | 5 ++-- shaders/cloud_voxel_advect.comp | 25 +++++++++++------- shaders/clouds.frag | 39 ++++++++++++++------------- shaders/deferred_lighting.frag | 25 +++++++++++++----- shaders/deferred_lighting_nort.frag | 23 +++++++++++----- shaders/gbuffer.frag | 37 +++++++++++++++++--------- shaders/ibl_common.glsl | 37 ++++++++++++++------------ shaders/lighting_common.glsl | 41 ++++++++++++++++++++--------- shaders/mesh.frag | 41 ++++++++++++++++++----------- shaders/sky.comp | 10 ++++--- shaders/ssr.frag | 10 +++++-- shaders/ssr_rt.frag | 12 ++++++--- 12 files changed, 196 insertions(+), 109 deletions(-) diff --git a/shaders/background_env.frag b/shaders/background_env.frag index 079f6a2..cbe973f 100644 --- a/shaders/background_env.frag +++ b/shaders/background_env.frag @@ -14,9 +14,10 @@ void main() // Avoid per-pixel matrix inverses. With a perspective projection, a view-space ray can be // reconstructed directly from the projection diagonal and then rotated to world space. vec3 viewDir = normalize(vec3(ndc.x / sceneData.proj[0][0], ndc.y / sceneData.proj[1][1], -1.0)); - vec3 worldDir = normalize(transpose(mat3(sceneData.view)) * viewDir); + // view matrix is rigid-body => transpose(mat3(view)) preserves length for normalized vectors. + vec3 worldDir = transpose(mat3(sceneData.view)) * viewDir; - vec2 uv = dir_to_equirect(worldDir); + vec2 uv = dir_to_equirect_normalized(worldDir); // Sample a dedicated background environment map when available. // The engine binds iblBackground2D to a texture that may differ from the IBL specular map. vec3 col = textureLod(iblBackground2D, uv, 0.0).rgb; diff --git a/shaders/cloud_voxel_advect.comp b/shaders/cloud_voxel_advect.comp index b498266..7eb48c9 100644 --- a/shaders/cloud_voxel_advect.comp +++ b/shaders/cloud_voxel_advect.comp @@ -100,6 +100,7 @@ float sample_density_trilinear(vec3 uvw, int res) { uvw = clamp(uvw, vec3(0.0), vec3(1.0)); + int slice = res * res; float fres = float(res); vec3 g = uvw * (fres - 1.0); @@ -108,16 +109,23 @@ float sample_density_trilinear(vec3 uvw, int res) vec3 f = fract(g); ivec3 b1 = min(base + ivec3(1), ivec3(res - 1)); + ivec3 step = b1 - base; // 0 or 1 per axis - float d000 = vox_in.density[idx3(ivec3(base.x, base.y, base.z), res)]; - float d100 = vox_in.density[idx3(ivec3(b1.x, base.y, base.z), res)]; - float d010 = vox_in.density[idx3(ivec3(base.x, b1.y, base.z), res)]; - float d110 = vox_in.density[idx3(ivec3(b1.x, b1.y, base.z), res)]; + int baseIndex = base.x + base.y * res + base.z * slice; + int dx = step.x; + int dy = step.y * res; + int dz = step.z * slice; - float d001 = vox_in.density[idx3(ivec3(base.x, base.y, b1.z), res)]; - float d101 = vox_in.density[idx3(ivec3(b1.x, base.y, b1.z), res)]; - float d011 = vox_in.density[idx3(ivec3(base.x, b1.y, b1.z), res)]; - float d111 = vox_in.density[idx3(ivec3(b1.x, b1.y, b1.z), res)]; + float d000 = vox_in.density[baseIndex]; + float d100 = vox_in.density[baseIndex + dx]; + float d010 = vox_in.density[baseIndex + dy]; + float d110 = vox_in.density[baseIndex + dy + dx]; + + int baseIndexZ = baseIndex + dz; + float d001 = vox_in.density[baseIndexZ]; + float d101 = vox_in.density[baseIndexZ + dx]; + float d011 = vox_in.density[baseIndexZ + dy]; + float d111 = vox_in.density[baseIndexZ + dy + dx]; float x00 = mix(d000, d100, f.x); float x10 = mix(d010, d110, f.x); @@ -223,4 +231,3 @@ void main() vox_out.density[idx3(c, res)] = out_d; } - diff --git a/shaders/clouds.frag b/shaders/clouds.frag index 39a501a..78eb4f0 100644 --- a/shaders/clouds.frag +++ b/shaders/clouds.frag @@ -52,11 +52,6 @@ bool intersectAABB(vec3 ro, vec3 rd, vec3 bmin, vec3 bmax, out float tmin, out f return tmax >= max(tmin, 0.0); } -int idx3(ivec3 c, int res) -{ - return c.x + c.y * res + c.z * res * res; -} - float sample_voxel_density(vec3 p, vec3 bmin, vec3 bmax) { vec3 uvw = (p - bmin) / (bmax - bmin); @@ -66,6 +61,7 @@ float sample_voxel_density(vec3 p, vec3 bmin, vec3 bmax) } int res = max(pc.misc.y, 1); + int slice = res * res; float fres = float(res); vec3 g = uvw * (fres - 1.0); @@ -74,16 +70,23 @@ float sample_voxel_density(vec3 p, vec3 bmin, vec3 bmax) vec3 f = fract(g); ivec3 b1 = min(base + ivec3(1), ivec3(res - 1)); + ivec3 step = b1 - base; // 0 or 1 per axis - float d000 = voxel.density[idx3(ivec3(base.x, base.y, base.z), res)]; - float d100 = voxel.density[idx3(ivec3(b1.x, base.y, base.z), res)]; - float d010 = voxel.density[idx3(ivec3(base.x, b1.y, base.z), res)]; - float d110 = voxel.density[idx3(ivec3(b1.x, b1.y, base.z), res)]; + int baseIndex = base.x + base.y * res + base.z * slice; + int dx = step.x; + int dy = step.y * res; + int dz = step.z * slice; - float d001 = voxel.density[idx3(ivec3(base.x, base.y, b1.z), res)]; - float d101 = voxel.density[idx3(ivec3(b1.x, base.y, b1.z), res)]; - float d011 = voxel.density[idx3(ivec3(base.x, b1.y, b1.z), res)]; - float d111 = voxel.density[idx3(ivec3(b1.x, b1.y, b1.z), res)]; + float d000 = voxel.density[baseIndex]; + float d100 = voxel.density[baseIndex + dx]; + float d010 = voxel.density[baseIndex + dy]; + float d110 = voxel.density[baseIndex + dy + dx]; + + int baseIndexZ = baseIndex + dz; + float d001 = voxel.density[baseIndexZ]; + float d101 = voxel.density[baseIndexZ + dx]; + float d011 = voxel.density[baseIndexZ + dy]; + float d111 = voxel.density[baseIndexZ + dy + dx]; float x00 = mix(d000, d100, f.x); float x10 = mix(d010, d110, f.x); @@ -103,11 +106,9 @@ void main() vec3 camPos = getCameraWorldPosition(); // Reconstruct a world-space ray for this pixel (Vulkan depth range 0..1). - mat4 invViewProj = inverse(sceneData.viewproj); vec2 ndc = inUV * 2.0 - 1.0; - vec4 farH = invViewProj * vec4(ndc, 1.0, 1.0); - vec3 farP = farH.xyz / max(farH.w, 1e-6); - vec3 rd = normalize(farP - camPos); + vec3 viewDir = normalize(vec3(ndc.x / sceneData.proj[0][0], ndc.y / sceneData.proj[1][1], -1.0)); + vec3 rd = transpose(mat3(sceneData.view)) * viewDir; // Define a local-space cloud volume (optionally anchored to camera XZ). vec3 center = pc.volume_center_follow.xyz; @@ -180,7 +181,8 @@ void main() else { float cosTheta = clamp(dot(rd, Lsun), 0.0, 1.0); - float phase = 0.30 + 0.70 * pow(cosTheta, 4.0); // cheap forward-scatter bias + float cos2 = cosTheta * cosTheta; + float phase = 0.30 + 0.70 * (cos2 * cos2); // cheap forward-scatter bias vec3 light = ambCol * 0.25 + sunCol * phase; vec3 albedo = clamp(pc.scatter_params.rgb, vec3(0.0), vec3(1.0)); @@ -205,4 +207,3 @@ void main() vec3 outRgb = scatter + trans * baseColor; outColor = vec4(outRgb, 1.0); } - diff --git a/shaders/deferred_lighting.frag b/shaders/deferred_lighting.frag index 726d911..14fafa2 100644 --- a/shaders/deferred_lighting.frag +++ b/shaders/deferred_lighting.frag @@ -63,6 +63,15 @@ vec2(0.0281, -0.2468), vec2(-0.2104, 0.0573), vec2(0.1197, 0.0779), vec2(-0.0905, -0.1203) ); +// Precomputed per-tap weights: w = 1 - smoothstep(0, 0.65, length(POISSON_16[i])). +// (Rotation preserves length, so these are invariant.) +const float POISSON_16_WEIGHT[16] = float[16]( +0.46137072, 0.56308092, 0.37907144, 0.34930667, +0.17150249, 0.22669642, 0.16976301, 0.19912809, +0.20140948, 0.24589236, 0.18334537, 0.14418702, +0.67350789, 0.73787198, 0.87638682, 0.86392944 +); + // Compute primary cascade and an optional neighbor for cross-fade near borders struct CascadeMix { uint i0; uint i1; float w1; }; @@ -154,11 +163,13 @@ float sampleCascadeShadow(uint ci, vec3 worldPos, vec3 N, vec3 L) // Slope-based tiny baseline bias (cheap safety net) float NoL = max(dot(N, L), 0.0); float slopeBias = max(0.0006 * (1.0 - NoL), SHADOW_MIN_BIAS); + float currentBias = current + slopeBias; // Receiver-plane depth gradient in shadow UV space vec3 dndc_dx = dFdx(ndc); vec3 dndc_dy = dFdy(ndc); vec2 dz_duv = receiverPlaneDepthGradient(ndc, dndc_dx, dndc_dy); + vec2 abs_dz_duv = abs(dz_duv) * SHADOW_RPDB_SCALE; ivec2 dim = textureSize(shadowTex[ci], 0); vec2 texelSize = 1.0 / vec2(dim); @@ -179,16 +190,15 @@ float sampleCascadeShadow(uint ci, vec3 worldPos, vec3 N, vec3 L) vec2 pu = rot * POISSON_16[i]; vec2 off = pu * radius * texelSize;// uv-space offset of this tap - float pr = length(pu); - float w = 1.0 - smoothstep(0.0, 0.65, pr); + float w = POISSON_16_WEIGHT[i]; float mapD = texture(shadowTex[ci], suv + off).r; // Receiver-plane depth bias: conservative depth delta over this tap's offset // Approximate |Δz| ≈ |dz/du|*|Δu| + |dz/dv|*|Δv| - float rpdb = dot(abs(dz_duv), abs(off)) * SHADOW_RPDB_SCALE; + float rpdb = dot(abs_dz_duv, abs(off)); - float vis = step(mapD, current + slopeBias + rpdb); + float vis = step(mapD, currentBias + rpdb); visible += vis * w; wsum += w; @@ -366,7 +376,7 @@ void main(){ if (maxT > 0.01) { vec3 L = toL / maxT; - vec3 dir = normalize(sceneData.spotLights[i].direction_cos_outer.xyz); + vec3 dir = sceneData.spotLights[i].direction_cos_outer.xyz; float cosTheta = dot(-L, dir); if (cosTheta > sceneData.spotLights[i].direction_cos_outer.w) { @@ -399,11 +409,12 @@ void main(){ // Image-Based Lighting: split-sum approximation vec3 R = reflect(-V, N); + float NdotV = max(dot(N, V), 0.0); float levels = float(textureQueryLevels(iblSpec2D)); float lod = ibl_lod_from_roughness(roughness, levels); - vec2 uv = dir_to_equirect(R); + vec2 uv = dir_to_equirect_normalized(R); vec3 prefiltered = textureLod(iblSpec2D, uv, lod).rgb; - vec2 brdf = texture(iblBRDF, vec2(max(dot(N, V), 0.0), roughness)).rg; + vec2 brdf = texture(iblBRDF, vec2(NdotV, roughness)).rg; vec3 F0 = mix(vec3(0.04), albedo, metallic); vec3 specIBL = prefiltered * (F0 * brdf.x + brdf.y); vec3 diffIBL = (1.0 - metallic) * albedo * sh_eval_irradiance(N); diff --git a/shaders/deferred_lighting_nort.frag b/shaders/deferred_lighting_nort.frag index 80e61f3..04f5f72 100644 --- a/shaders/deferred_lighting_nort.frag +++ b/shaders/deferred_lighting_nort.frag @@ -55,6 +55,15 @@ vec2(0.0281, -0.2468), vec2(-0.2104, 0.0573), vec2(0.1197, 0.0779), vec2(-0.0905, -0.1203) ); +// Precomputed per-tap weights: w = 1 - smoothstep(0, 0.65, length(POISSON_16[i])). +// (Rotation preserves length, so these are invariant.) +const float POISSON_16_WEIGHT[16] = float[16]( +0.46137072, 0.56308092, 0.37907144, 0.34930667, +0.17150249, 0.22669642, 0.16976301, 0.19912809, +0.20140948, 0.24589236, 0.18334537, 0.14418702, +0.67350789, 0.73787198, 0.87638682, 0.86392944 +); + // Compute primary cascade and an optional neighbor for cross-fade near borders struct CascadeMix { uint i0; uint i1; float w1; }; @@ -146,11 +155,13 @@ float sampleCascadeShadow(uint ci, vec3 worldPos, vec3 N, vec3 L) // Slope-based tiny baseline bias (cheap safety net) float NoL = max(dot(N, L), 0.0); float slopeBias = max(0.0006 * (1.0 - NoL), SHADOW_MIN_BIAS); + float currentBias = current + slopeBias; // Receiver-plane depth gradient in shadow UV space vec3 dndc_dx = dFdx(ndc); vec3 dndc_dy = dFdy(ndc); vec2 dz_duv = receiverPlaneDepthGradient(ndc, dndc_dx, dndc_dy); + vec2 abs_dz_duv = abs(dz_duv) * SHADOW_RPDB_SCALE; ivec2 dim = textureSize(shadowTex[ci], 0); vec2 texelSize = 1.0 / vec2(dim); @@ -171,16 +182,15 @@ float sampleCascadeShadow(uint ci, vec3 worldPos, vec3 N, vec3 L) vec2 pu = rot * POISSON_16[i]; vec2 off = pu * radius * texelSize; // uv-space offset of this tap - float pr = length(pu); - float w = 1.0 - smoothstep(0.0, 0.65, pr); + float w = POISSON_16_WEIGHT[i]; float mapD = texture(shadowTex[ci], suv + off).r; // Receiver-plane depth bias: conservative depth delta over this tap's offset // Approximate |Δz| ≈ |dz/du|*|Δu| + |dz/dv|*|Δv| - float rpdb = dot(abs(dz_duv), abs(off)) * SHADOW_RPDB_SCALE; + float rpdb = dot(abs_dz_duv, abs(off)); - float vis = step(mapD, current + slopeBias + rpdb); + float vis = step(mapD, currentBias + rpdb); visible += vis * w; wsum += w; @@ -255,11 +265,12 @@ void main(){ // Image-Based Lighting: split-sum approximation vec3 R = reflect(-V, N); + float NdotV = max(dot(N, V), 0.0); float levels = float(textureQueryLevels(iblSpec2D)); float lod = ibl_lod_from_roughness(roughness, levels); - vec2 uv = dir_to_equirect(R); + vec2 uv = dir_to_equirect_normalized(R); vec3 prefiltered = textureLod(iblSpec2D, uv, lod).rgb; - vec2 brdf = texture(iblBRDF, vec2(max(dot(N, V), 0.0), roughness)).rg; + vec2 brdf = texture(iblBRDF, vec2(NdotV, roughness)).rg; vec3 F0 = mix(vec3(0.04), albedo, metallic); vec3 specIBL = prefiltered * (F0 * brdf.x + brdf.y); vec3 diffIBL = (1.0 - metallic) * albedo * sh_eval_irradiance(N); diff --git a/shaders/gbuffer.frag b/shaders/gbuffer.frag index 2ef736e..ca773f8 100644 --- a/shaders/gbuffer.frag +++ b/shaders/gbuffer.frag @@ -54,16 +54,22 @@ void main() { // Normal mapping: decode tangent-space normal and transform to world space // Expect UNORM normal map; support BC5 (RG) by reconstructing Z from XY. - vec2 enc = texture(normalMap, inUV).xy * 2.0 - 1.0; - float normalScale = max(materialData.extra[0].x, 0.0); - enc *= normalScale; - float z2 = 1.0 - dot(enc, enc); - float nz = z2 > 0.0 ? sqrt(z2) : 0.0; - vec3 Nm = vec3(enc, nz); vec3 N = normalize(inNormal); - vec3 T = normalize(inTangent.xyz); - vec3 B = normalize(cross(N, T)) * inTangent.w; - vec3 Nw = normalize(T * Nm.x + B * Nm.y + N * Nm.z); + vec3 Nw = N; + + float normalScale = max(materialData.extra[0].x, 0.0); + if (normalScale > 0.0) + { + vec2 enc = texture(normalMap, inUV).xy * 2.0 - 1.0; + enc *= normalScale; + float z2 = 1.0 - dot(enc, enc); + float nz = z2 > 0.0 ? sqrt(z2) : 0.0; + vec3 Nm = vec3(enc, nz); + + vec3 T = normalize(inTangent.xyz); + vec3 B = normalize(cross(N, T)) * inTangent.w; + Nw = normalize(T * Nm.x + B * Nm.y + N * Nm.z); + } outPos = vec4(inWorldPos, 1.0); outNorm = vec4(Nw, roughness); @@ -72,15 +78,20 @@ void main() { // extra[0].y = AO strength, extra[0].z = hasAO flag (1 = use AO texture) float hasAO = materialData.extra[0].z; float aoStrength = clamp(materialData.extra[0].y, 0.0, 1.0); - float aoTex = texture(occlusionTex, inUV).r; float ao = 1.0; - if (hasAO > 0.5) + if (hasAO > 0.5 && aoStrength > 0.0) { + float aoTex = texture(occlusionTex, inUV).r; ao = 1.0 - aoStrength + aoStrength * aoTex; } + + vec3 emissive = vec3(0.0); vec3 emissiveFactor = materialData.extra[1].rgb; - vec3 emissiveTex = texture(emissiveTex, inUV).rgb; - vec3 emissive = emissiveTex * emissiveFactor; + if (any(greaterThan(emissiveFactor, vec3(0.0)))) + { + vec3 emissiveSample = texture(emissiveTex, inUV).rgb; + emissive = emissiveSample * emissiveFactor; + } outExtra = vec4(ao, emissive); outObjectID = PushConstants.objectID; } diff --git a/shaders/ibl_common.glsl b/shaders/ibl_common.glsl index 452e3bd..06307b7 100644 --- a/shaders/ibl_common.glsl +++ b/shaders/ibl_common.glsl @@ -17,34 +17,38 @@ vec3 sh_eval_irradiance(vec3 n) const float c2 = 1.0925484306; const float c3 = 0.3153915653; const float c4 = 0.5462742153; - float Y[9]; - Y[0] = c0; - Y[1] = c1 * y; - Y[2] = c1 * z; - Y[3] = c1 * x; - Y[4] = c2 * x * y; - Y[5] = c2 * y * z; - Y[6] = c3 * (3.0 * z * z - 1.0); - Y[7] = c2 * x * z; - Y[8] = c4 * (x * x - y * y); + + float x2 = x * x; + float y2 = y * y; + float z2 = z * z; + vec3 r = vec3(0.0); - for (int i = 0; i < 9; ++i) - { - r += iblSH.sh[i].rgb * Y[i]; - } + r += iblSH.sh[0].rgb * c0; + r += iblSH.sh[1].rgb * (c1 * y); + r += iblSH.sh[2].rgb * (c1 * z); + r += iblSH.sh[3].rgb * (c1 * x); + r += iblSH.sh[4].rgb * (c2 * x * y); + r += iblSH.sh[5].rgb * (c2 * y * z); + r += iblSH.sh[6].rgb * (c3 * (3.0 * z2 - 1.0)); + r += iblSH.sh[7].rgb * (c2 * x * z); + r += iblSH.sh[8].rgb * (c4 * (x2 - y2)); return r; } // Map direction to equirectangular UV (same convention across shaders). -vec2 dir_to_equirect(vec3 d) +vec2 dir_to_equirect_normalized(vec3 d) { - d = normalize(d); float phi = atan(d.z, d.x); float theta = acos(clamp(d.y, -1.0, 1.0)); // 1/(2*pi) = 0.15915494309, 1/pi = 0.31830988618 return vec2(phi * 0.15915494309 + 0.5, theta * 0.31830988618); } +vec2 dir_to_equirect(vec3 d) +{ + return dir_to_equirect_normalized(normalize(d)); +} + // Helper for selecting mip LOD from roughness and available levels. // Uses roughness^2 to bias towards blurrier reflections at mid roughness. float ibl_lod_from_roughness(float roughness, float levels) @@ -55,4 +59,3 @@ float ibl_lod_from_roughness(float roughness, float levels) } #endif // IBL_COMMON_GLSL - diff --git a/shaders/lighting_common.glsl b/shaders/lighting_common.glsl index 5a1a0d7..77aa033 100644 --- a/shaders/lighting_common.glsl +++ b/shaders/lighting_common.glsl @@ -3,9 +3,16 @@ const float PI = 3.14159265359; +float pow5(float x) +{ + float x2 = x * x; + return x2 * x2 * x; +} + vec3 fresnelSchlick(float cosTheta, vec3 F0) { - return F0 + (1.0 - F0) * pow(1.0 - cosTheta, 5.0); + float m = clamp(1.0 - cosTheta, 0.0, 1.0); + return F0 + (1.0 - F0) * pow5(m); } float DistributionGGX(vec3 N, vec3 H, float roughness) @@ -42,19 +49,21 @@ vec3 evaluate_brdf(vec3 N, vec3 V, vec3 L, vec3 albedo, float roughness, float m { vec3 H = normalize(V + L); + float NdotV = max(dot(N, V), 0.0); + float NdotL = max(dot(N, L), 0.0); + vec3 F0 = mix(vec3(0.04), albedo, metallic); vec3 F = fresnelSchlick(max(dot(H, V), 0.0), F0); float NDF = DistributionGGX(N, H, roughness); float G = GeometrySmith(N, V, L, roughness); vec3 numerator = NDF * G * F; - float denom = 4.0 * max(dot(N, V), 0.0) * max(dot(N, L), 0.0); + float denom = 4.0 * NdotV * NdotL; vec3 specular = numerator / max(denom, 0.001); vec3 kS = F; vec3 kD = (1.0 - kS) * (1.0 - metallic); - float NdotL = max(dot(N, L), 0.0); return (kD * albedo / PI + specular) * NdotL; } @@ -62,16 +71,19 @@ vec3 eval_point_light(GPUPunctualLight light, vec3 pos, vec3 N, vec3 V, vec3 alb { vec3 lightPos = light.position_radius.xyz; float radius = max(light.position_radius.w, 0.0001); - vec3 L = lightPos - pos; - float dist = length(L); - if (dist <= 0.0001) + + vec3 toLight = lightPos - pos; + float dist2 = dot(toLight, toLight); + if (dist2 <= 1.0e-8) { return vec3(0.0); } - L /= dist; + float invDist = inversesqrt(dist2); + float dist = dist2 * invDist; + vec3 L = toLight * invDist; // Smooth falloff: inverse-square with soft clamp at radius - float att = 1.0 / max(dist * dist, 0.0001); + float att = 1.0 / max(dist2, 1.0e-8); float x = clamp(dist / radius, 0.0, 1.0); float smth = (1.0 - x * x); smth *= smth; @@ -88,14 +100,17 @@ vec3 eval_spot_light(GPUSpotLight light, vec3 pos, vec3 N, vec3 V, vec3 albedo, float radius = max(light.position_radius.w, 0.0001); vec3 toLight = lightPos - pos; - float dist = length(toLight); - if (dist <= 0.0001) + float dist2 = dot(toLight, toLight); + if (dist2 <= 1.0e-8) { return vec3(0.0); } - vec3 L = toLight / dist; // surface -> light + float invDist = inversesqrt(dist2); + float dist = dist2 * invDist; + vec3 L = toLight * invDist; // surface -> light - vec3 dir = normalize(light.direction_cos_outer.xyz); // light -> forward + // direction_cos_outer.xyz is expected to be unit length (normalized on the CPU). + vec3 dir = light.direction_cos_outer.xyz; // light -> forward float cosOuter = light.direction_cos_outer.w; float cosInner = light.cone.x; float cosTheta = dot(-L, dir); // light -> surface vs light forward @@ -108,7 +123,7 @@ vec3 eval_spot_light(GPUSpotLight light, vec3 pos, vec3 N, vec3 V, vec3 albedo, spot *= spot; // Smooth falloff: inverse-square with soft clamp at radius - float att = 1.0 / max(dist * dist, 0.0001); + float att = 1.0 / max(dist2, 1.0e-8); float x = clamp(dist / radius, 0.0, 1.0); float smth = (1.0 - x * x); smth *= smth; diff --git a/shaders/mesh.frag b/shaders/mesh.frag index 59d300a..9d9b4f9 100644 --- a/shaders/mesh.frag +++ b/shaders/mesh.frag @@ -44,16 +44,22 @@ void main() // Normal mapping path for forward/transparent pipeline // Expect UNORM normal map; support BC5 (RG) by reconstructing Z from XY. - vec2 enc = texture(normalMap, inUV).xy * 2.0 - 1.0; - float normalScale = max(materialData.extra[0].x, 0.0); - enc *= normalScale; - float z2 = 1.0 - dot(enc, enc); - float nz = z2 > 0.0 ? sqrt(z2) : 0.0; - vec3 Nm = vec3(enc, nz); vec3 Nn = normalize(inNormal); - vec3 T = normalize(inTangent.xyz); - vec3 B = normalize(cross(Nn, T)) * inTangent.w; - vec3 N = normalize(T * Nm.x + B * Nm.y + Nn * Nm.z); + vec3 N = Nn; + + float normalScale = max(materialData.extra[0].x, 0.0); + if (normalScale > 0.0) + { + vec2 enc = texture(normalMap, inUV).xy * 2.0 - 1.0; + enc *= normalScale; + float z2 = 1.0 - dot(enc, enc); + float nz = z2 > 0.0 ? sqrt(z2) : 0.0; + vec3 Nm = vec3(enc, nz); + + vec3 T = normalize(inTangent.xyz); + vec3 B = normalize(cross(Nn, T)) * inTangent.w; + N = normalize(T * Nm.x + B * Nm.y + Nn * Nm.z); + } vec3 camPos = getCameraWorldPosition(); vec3 V = normalize(camPos - inWorldPos); @@ -78,11 +84,12 @@ void main() // IBL: specular from equirect 2D mips; diffuse from SH vec3 R = reflect(-V, N); + float NdotV = max(dot(N, V), 0.0); float levels = float(textureQueryLevels(iblSpec2D)); float lod = ibl_lod_from_roughness(roughness, levels); - vec2 uv = dir_to_equirect(R); + vec2 uv = dir_to_equirect_normalized(R); vec3 prefiltered = textureLod(iblSpec2D, uv, lod).rgb; - vec2 brdf = texture(iblBRDF, vec2(max(dot(N, V), 0.0), roughness)).rg; + vec2 brdf = texture(iblBRDF, vec2(NdotV, roughness)).rg; vec3 F0 = mix(vec3(0.04), albedo, metallic); vec3 specIBL = prefiltered * (F0 * brdf.x + brdf.y); vec3 diffIBL = (1.0 - metallic) * albedo * sh_eval_irradiance(N); @@ -91,17 +98,21 @@ void main() // extra[0].y = AO strength, extra[0].z = hasAO flag (1 = use AO texture) float hasAO = materialData.extra[0].z; float aoStrength = clamp(materialData.extra[0].y, 0.0, 1.0); - float aoTex = texture(occlusionTex, inUV).r; float ao = 1.0; - if (hasAO > 0.5) + if (hasAO > 0.5 && aoStrength > 0.0) { + float aoTex = texture(occlusionTex, inUV).r; ao = 1.0 - aoStrength + aoStrength * aoTex; } // Emissive from texture and factor + vec3 emissive = vec3(0.0); vec3 emissiveFactor = materialData.extra[1].rgb; - vec3 emissiveTex = texture(emissiveTex, inUV).rgb; - vec3 emissive = emissiveTex * emissiveFactor; + if (any(greaterThan(emissiveFactor, vec3(0.0)))) + { + vec3 emissiveSample = texture(emissiveTex, inUV).rgb; + emissive = emissiveSample * emissiveFactor; + } vec3 indirect = diffIBL + specIBL; vec3 color = direct + indirect * ao + emissive; diff --git a/shaders/sky.comp b/shaders/sky.comp index 4b466e1..c62388f 100644 --- a/shaders/sky.comp +++ b/shaders/sky.comp @@ -17,7 +17,12 @@ float NoisyStarField( in vec2 vSamplePos, float fThreshhold ) { float StarVal = Noise2d( vSamplePos ); if ( StarVal >= fThreshhold ) - StarVal = pow( (StarVal - fThreshhold)/(1.0 - fThreshhold), 6.0 ); + { + float t = (StarVal - fThreshhold) / (1.0 - fThreshhold); + float t2 = t * t; + float t4 = t2 * t2; + StarVal = t4 * t2; + } else StarVal = 0.0; return StarVal; @@ -57,7 +62,7 @@ void mainImage( out vec4 fragColor, in vec2 fragCoord ) // Stars with a slow crawl. float xRate = 0.2; float yRate = -0.06; - vec2 vSamplePos = fragCoord.xy + vec2( xRate * float( 1 ), yRate * float( 1 ) ); + vec2 vSamplePos = fragCoord.xy + vec2( xRate, yRate ); float StarVal = StableStarField( vSamplePos, StarFieldThreshhold ); vColor += vec3( StarVal ); @@ -80,4 +85,3 @@ void main() imageStore(image, texelCoord, color); } } - diff --git a/shaders/ssr.frag b/shaders/ssr.frag index b2c5d7d..b9fda50 100644 --- a/shaders/ssr.frag +++ b/shaders/ssr.frag @@ -20,6 +20,12 @@ vec3 getCameraWorldPosition() return -rot * T; // C = -R * T } +float pow5(float x) +{ + float x2 = x * x; + return x2 * x2 * x; +} + vec3 projectToScreenFromView(vec3 viewPos) { vec4 clip = sceneData.proj * vec4(viewPos, 1.0); @@ -65,7 +71,7 @@ void main() vec3 V = normalize(camPos - worldPos); vec3 R = reflect(-V, N); vec3 viewPos = (sceneData.view * vec4(worldPos, 1.0)).xyz; - vec3 viewDir = normalize((sceneData.view * vec4(R, 0.0)).xyz); + vec3 viewDir = (sceneData.view * vec4(R, 0.0)).xyz; float gloss = 1.0 - roughness; float F0 = mix(0.04, 1.0, metallic); @@ -125,7 +131,7 @@ void main() vec3 reflColor = texture(hdrColor, hitUV).rgb; float NoV = clamp(dot(N, V), 0.0, 1.0); - float F = F0 + (1.0 - F0) * pow(1.0 - NoV, 5.0); // Schlick + float F = F0 + (1.0 - F0) * pow5(1.0 - NoV); // Schlick float ssrVisibility = gloss; float weight = clamp(F * ssrVisibility, 0.0, 1.0); diff --git a/shaders/ssr_rt.frag b/shaders/ssr_rt.frag index cb66428..ccb3387 100644 --- a/shaders/ssr_rt.frag +++ b/shaders/ssr_rt.frag @@ -25,6 +25,12 @@ vec3 getCameraWorldPosition() return -rot * T; // C = -R * T } +float pow5(float x) +{ + float x2 = x * x; + return x2 * x2 * x; +} + vec3 projectToScreenFromView(vec3 viewPos) { vec4 clip = sceneData.proj * vec4(viewPos, 1.0); @@ -72,7 +78,7 @@ void main() vec3 V = normalize(camPos - worldPos); vec3 R = reflect(-V, N); vec3 viewPos = (sceneData.view * vec4(worldPos, 1.0)).xyz; - vec3 viewDir = normalize((sceneData.view * vec4(R, 0.0)).xyz); + vec3 viewDir = (sceneData.view * vec4(R, 0.0)).xyz; float gloss = 1.0 - roughness; float F0 = mix(0.04, 1.0, metallic); @@ -146,7 +152,7 @@ void main() vec3 reflColor = texture(hdrColor, ssrUV).rgb; float NoV = clamp(dot(N, V), 0.0, 1.0); - float F = F0 + (1.0 - F0) * pow(1.0 - NoV, 5.0); // Schlick + float F = F0 + (1.0 - F0) * pow5(1.0 - NoV); // Schlick float ssrVisibility = gloss; float weight = clamp(F * ssrVisibility, 0.0, 1.0); @@ -198,7 +204,7 @@ void main() vec3 reflColor = texture(hdrColor, hitUV).rgb; float NoV = clamp(dot(N, V), 0.0, 1.0); - float F = F0 + (1.0 - F0) * pow(1.0 - NoV, 5.0); // Schlick + float F = F0 + (1.0 - F0) * pow5(1.0 - NoV); // Schlick float rtVisibility = gloss; float weight = clamp(F * rtVisibility, 0.0, 1.0);