OPTIM: shader optimization

This commit is contained in:
2025-12-26 14:29:59 +09:00
parent d6216b20fc
commit cead54c32e
12 changed files with 196 additions and 109 deletions

View File

@@ -14,9 +14,10 @@ void main()
// Avoid per-pixel matrix inverses. With a perspective projection, a view-space ray can be // Avoid per-pixel matrix inverses. With a perspective projection, a view-space ray can be
// reconstructed directly from the projection diagonal and then rotated to world space. // reconstructed directly from the projection diagonal and then rotated to world space.
vec3 viewDir = normalize(vec3(ndc.x / sceneData.proj[0][0], ndc.y / sceneData.proj[1][1], -1.0)); vec3 viewDir = normalize(vec3(ndc.x / sceneData.proj[0][0], ndc.y / sceneData.proj[1][1], -1.0));
vec3 worldDir = normalize(transpose(mat3(sceneData.view)) * viewDir); // view matrix is rigid-body => transpose(mat3(view)) preserves length for normalized vectors.
vec3 worldDir = transpose(mat3(sceneData.view)) * viewDir;
vec2 uv = dir_to_equirect(worldDir); vec2 uv = dir_to_equirect_normalized(worldDir);
// Sample a dedicated background environment map when available. // Sample a dedicated background environment map when available.
// The engine binds iblBackground2D to a texture that may differ from the IBL specular map. // The engine binds iblBackground2D to a texture that may differ from the IBL specular map.
vec3 col = textureLod(iblBackground2D, uv, 0.0).rgb; vec3 col = textureLod(iblBackground2D, uv, 0.0).rgb;

View File

@@ -100,6 +100,7 @@ float sample_density_trilinear(vec3 uvw, int res)
{ {
uvw = clamp(uvw, vec3(0.0), vec3(1.0)); uvw = clamp(uvw, vec3(0.0), vec3(1.0));
int slice = res * res;
float fres = float(res); float fres = float(res);
vec3 g = uvw * (fres - 1.0); vec3 g = uvw * (fres - 1.0);
@@ -108,16 +109,23 @@ float sample_density_trilinear(vec3 uvw, int res)
vec3 f = fract(g); vec3 f = fract(g);
ivec3 b1 = min(base + ivec3(1), ivec3(res - 1)); ivec3 b1 = min(base + ivec3(1), ivec3(res - 1));
ivec3 step = b1 - base; // 0 or 1 per axis
float d000 = vox_in.density[idx3(ivec3(base.x, base.y, base.z), res)]; int baseIndex = base.x + base.y * res + base.z * slice;
float d100 = vox_in.density[idx3(ivec3(b1.x, base.y, base.z), res)]; int dx = step.x;
float d010 = vox_in.density[idx3(ivec3(base.x, b1.y, base.z), res)]; int dy = step.y * res;
float d110 = vox_in.density[idx3(ivec3(b1.x, b1.y, base.z), res)]; int dz = step.z * slice;
float d001 = vox_in.density[idx3(ivec3(base.x, base.y, b1.z), res)]; float d000 = vox_in.density[baseIndex];
float d101 = vox_in.density[idx3(ivec3(b1.x, base.y, b1.z), res)]; float d100 = vox_in.density[baseIndex + dx];
float d011 = vox_in.density[idx3(ivec3(base.x, b1.y, b1.z), res)]; float d010 = vox_in.density[baseIndex + dy];
float d111 = vox_in.density[idx3(ivec3(b1.x, b1.y, b1.z), res)]; float d110 = vox_in.density[baseIndex + dy + dx];
int baseIndexZ = baseIndex + dz;
float d001 = vox_in.density[baseIndexZ];
float d101 = vox_in.density[baseIndexZ + dx];
float d011 = vox_in.density[baseIndexZ + dy];
float d111 = vox_in.density[baseIndexZ + dy + dx];
float x00 = mix(d000, d100, f.x); float x00 = mix(d000, d100, f.x);
float x10 = mix(d010, d110, f.x); float x10 = mix(d010, d110, f.x);
@@ -223,4 +231,3 @@ void main()
vox_out.density[idx3(c, res)] = out_d; vox_out.density[idx3(c, res)] = out_d;
} }

View File

@@ -52,11 +52,6 @@ bool intersectAABB(vec3 ro, vec3 rd, vec3 bmin, vec3 bmax, out float tmin, out f
return tmax >= max(tmin, 0.0); return tmax >= max(tmin, 0.0);
} }
int idx3(ivec3 c, int res)
{
return c.x + c.y * res + c.z * res * res;
}
float sample_voxel_density(vec3 p, vec3 bmin, vec3 bmax) float sample_voxel_density(vec3 p, vec3 bmin, vec3 bmax)
{ {
vec3 uvw = (p - bmin) / (bmax - bmin); vec3 uvw = (p - bmin) / (bmax - bmin);
@@ -66,6 +61,7 @@ float sample_voxel_density(vec3 p, vec3 bmin, vec3 bmax)
} }
int res = max(pc.misc.y, 1); int res = max(pc.misc.y, 1);
int slice = res * res;
float fres = float(res); float fres = float(res);
vec3 g = uvw * (fres - 1.0); vec3 g = uvw * (fres - 1.0);
@@ -74,16 +70,23 @@ float sample_voxel_density(vec3 p, vec3 bmin, vec3 bmax)
vec3 f = fract(g); vec3 f = fract(g);
ivec3 b1 = min(base + ivec3(1), ivec3(res - 1)); ivec3 b1 = min(base + ivec3(1), ivec3(res - 1));
ivec3 step = b1 - base; // 0 or 1 per axis
float d000 = voxel.density[idx3(ivec3(base.x, base.y, base.z), res)]; int baseIndex = base.x + base.y * res + base.z * slice;
float d100 = voxel.density[idx3(ivec3(b1.x, base.y, base.z), res)]; int dx = step.x;
float d010 = voxel.density[idx3(ivec3(base.x, b1.y, base.z), res)]; int dy = step.y * res;
float d110 = voxel.density[idx3(ivec3(b1.x, b1.y, base.z), res)]; int dz = step.z * slice;
float d001 = voxel.density[idx3(ivec3(base.x, base.y, b1.z), res)]; float d000 = voxel.density[baseIndex];
float d101 = voxel.density[idx3(ivec3(b1.x, base.y, b1.z), res)]; float d100 = voxel.density[baseIndex + dx];
float d011 = voxel.density[idx3(ivec3(base.x, b1.y, b1.z), res)]; float d010 = voxel.density[baseIndex + dy];
float d111 = voxel.density[idx3(ivec3(b1.x, b1.y, b1.z), res)]; float d110 = voxel.density[baseIndex + dy + dx];
int baseIndexZ = baseIndex + dz;
float d001 = voxel.density[baseIndexZ];
float d101 = voxel.density[baseIndexZ + dx];
float d011 = voxel.density[baseIndexZ + dy];
float d111 = voxel.density[baseIndexZ + dy + dx];
float x00 = mix(d000, d100, f.x); float x00 = mix(d000, d100, f.x);
float x10 = mix(d010, d110, f.x); float x10 = mix(d010, d110, f.x);
@@ -103,11 +106,9 @@ void main()
vec3 camPos = getCameraWorldPosition(); vec3 camPos = getCameraWorldPosition();
// Reconstruct a world-space ray for this pixel (Vulkan depth range 0..1). // Reconstruct a world-space ray for this pixel (Vulkan depth range 0..1).
mat4 invViewProj = inverse(sceneData.viewproj);
vec2 ndc = inUV * 2.0 - 1.0; vec2 ndc = inUV * 2.0 - 1.0;
vec4 farH = invViewProj * vec4(ndc, 1.0, 1.0); vec3 viewDir = normalize(vec3(ndc.x / sceneData.proj[0][0], ndc.y / sceneData.proj[1][1], -1.0));
vec3 farP = farH.xyz / max(farH.w, 1e-6); vec3 rd = transpose(mat3(sceneData.view)) * viewDir;
vec3 rd = normalize(farP - camPos);
// Define a local-space cloud volume (optionally anchored to camera XZ). // Define a local-space cloud volume (optionally anchored to camera XZ).
vec3 center = pc.volume_center_follow.xyz; vec3 center = pc.volume_center_follow.xyz;
@@ -180,7 +181,8 @@ void main()
else else
{ {
float cosTheta = clamp(dot(rd, Lsun), 0.0, 1.0); float cosTheta = clamp(dot(rd, Lsun), 0.0, 1.0);
float phase = 0.30 + 0.70 * pow(cosTheta, 4.0); // cheap forward-scatter bias float cos2 = cosTheta * cosTheta;
float phase = 0.30 + 0.70 * (cos2 * cos2); // cheap forward-scatter bias
vec3 light = ambCol * 0.25 + sunCol * phase; vec3 light = ambCol * 0.25 + sunCol * phase;
vec3 albedo = clamp(pc.scatter_params.rgb, vec3(0.0), vec3(1.0)); vec3 albedo = clamp(pc.scatter_params.rgb, vec3(0.0), vec3(1.0));
@@ -205,4 +207,3 @@ void main()
vec3 outRgb = scatter + trans * baseColor; vec3 outRgb = scatter + trans * baseColor;
outColor = vec4(outRgb, 1.0); outColor = vec4(outRgb, 1.0);
} }

View File

@@ -63,6 +63,15 @@ vec2(0.0281, -0.2468), vec2(-0.2104, 0.0573),
vec2(0.1197, 0.0779), vec2(-0.0905, -0.1203) vec2(0.1197, 0.0779), vec2(-0.0905, -0.1203)
); );
// Precomputed per-tap weights: w = 1 - smoothstep(0, 0.65, length(POISSON_16[i])).
// (Rotation preserves length, so these are invariant.)
const float POISSON_16_WEIGHT[16] = float[16](
0.46137072, 0.56308092, 0.37907144, 0.34930667,
0.17150249, 0.22669642, 0.16976301, 0.19912809,
0.20140948, 0.24589236, 0.18334537, 0.14418702,
0.67350789, 0.73787198, 0.87638682, 0.86392944
);
// Compute primary cascade and an optional neighbor for cross-fade near borders // Compute primary cascade and an optional neighbor for cross-fade near borders
struct CascadeMix { uint i0; uint i1; float w1; }; struct CascadeMix { uint i0; uint i1; float w1; };
@@ -154,11 +163,13 @@ float sampleCascadeShadow(uint ci, vec3 worldPos, vec3 N, vec3 L)
// Slope-based tiny baseline bias (cheap safety net) // Slope-based tiny baseline bias (cheap safety net)
float NoL = max(dot(N, L), 0.0); float NoL = max(dot(N, L), 0.0);
float slopeBias = max(0.0006 * (1.0 - NoL), SHADOW_MIN_BIAS); float slopeBias = max(0.0006 * (1.0 - NoL), SHADOW_MIN_BIAS);
float currentBias = current + slopeBias;
// Receiver-plane depth gradient in shadow UV space // Receiver-plane depth gradient in shadow UV space
vec3 dndc_dx = dFdx(ndc); vec3 dndc_dx = dFdx(ndc);
vec3 dndc_dy = dFdy(ndc); vec3 dndc_dy = dFdy(ndc);
vec2 dz_duv = receiverPlaneDepthGradient(ndc, dndc_dx, dndc_dy); vec2 dz_duv = receiverPlaneDepthGradient(ndc, dndc_dx, dndc_dy);
vec2 abs_dz_duv = abs(dz_duv) * SHADOW_RPDB_SCALE;
ivec2 dim = textureSize(shadowTex[ci], 0); ivec2 dim = textureSize(shadowTex[ci], 0);
vec2 texelSize = 1.0 / vec2(dim); vec2 texelSize = 1.0 / vec2(dim);
@@ -179,16 +190,15 @@ float sampleCascadeShadow(uint ci, vec3 worldPos, vec3 N, vec3 L)
vec2 pu = rot * POISSON_16[i]; vec2 pu = rot * POISSON_16[i];
vec2 off = pu * radius * texelSize;// uv-space offset of this tap vec2 off = pu * radius * texelSize;// uv-space offset of this tap
float pr = length(pu); float w = POISSON_16_WEIGHT[i];
float w = 1.0 - smoothstep(0.0, 0.65, pr);
float mapD = texture(shadowTex[ci], suv + off).r; float mapD = texture(shadowTex[ci], suv + off).r;
// Receiver-plane depth bias: conservative depth delta over this tap's offset // Receiver-plane depth bias: conservative depth delta over this tap's offset
// Approximate |Δz| ≈ |dz/du|*|Δu| + |dz/dv|*|Δv| // Approximate |Δz| ≈ |dz/du|*|Δu| + |dz/dv|*|Δv|
float rpdb = dot(abs(dz_duv), abs(off)) * SHADOW_RPDB_SCALE; float rpdb = dot(abs_dz_duv, abs(off));
float vis = step(mapD, current + slopeBias + rpdb); float vis = step(mapD, currentBias + rpdb);
visible += vis * w; visible += vis * w;
wsum += w; wsum += w;
@@ -366,7 +376,7 @@ void main(){
if (maxT > 0.01) if (maxT > 0.01)
{ {
vec3 L = toL / maxT; vec3 L = toL / maxT;
vec3 dir = normalize(sceneData.spotLights[i].direction_cos_outer.xyz); vec3 dir = sceneData.spotLights[i].direction_cos_outer.xyz;
float cosTheta = dot(-L, dir); float cosTheta = dot(-L, dir);
if (cosTheta > sceneData.spotLights[i].direction_cos_outer.w) if (cosTheta > sceneData.spotLights[i].direction_cos_outer.w)
{ {
@@ -399,11 +409,12 @@ void main(){
// Image-Based Lighting: split-sum approximation // Image-Based Lighting: split-sum approximation
vec3 R = reflect(-V, N); vec3 R = reflect(-V, N);
float NdotV = max(dot(N, V), 0.0);
float levels = float(textureQueryLevels(iblSpec2D)); float levels = float(textureQueryLevels(iblSpec2D));
float lod = ibl_lod_from_roughness(roughness, levels); float lod = ibl_lod_from_roughness(roughness, levels);
vec2 uv = dir_to_equirect(R); vec2 uv = dir_to_equirect_normalized(R);
vec3 prefiltered = textureLod(iblSpec2D, uv, lod).rgb; vec3 prefiltered = textureLod(iblSpec2D, uv, lod).rgb;
vec2 brdf = texture(iblBRDF, vec2(max(dot(N, V), 0.0), roughness)).rg; vec2 brdf = texture(iblBRDF, vec2(NdotV, roughness)).rg;
vec3 F0 = mix(vec3(0.04), albedo, metallic); vec3 F0 = mix(vec3(0.04), albedo, metallic);
vec3 specIBL = prefiltered * (F0 * brdf.x + brdf.y); vec3 specIBL = prefiltered * (F0 * brdf.x + brdf.y);
vec3 diffIBL = (1.0 - metallic) * albedo * sh_eval_irradiance(N); vec3 diffIBL = (1.0 - metallic) * albedo * sh_eval_irradiance(N);

View File

@@ -55,6 +55,15 @@ vec2(0.0281, -0.2468), vec2(-0.2104, 0.0573),
vec2(0.1197, 0.0779), vec2(-0.0905, -0.1203) vec2(0.1197, 0.0779), vec2(-0.0905, -0.1203)
); );
// Precomputed per-tap weights: w = 1 - smoothstep(0, 0.65, length(POISSON_16[i])).
// (Rotation preserves length, so these are invariant.)
const float POISSON_16_WEIGHT[16] = float[16](
0.46137072, 0.56308092, 0.37907144, 0.34930667,
0.17150249, 0.22669642, 0.16976301, 0.19912809,
0.20140948, 0.24589236, 0.18334537, 0.14418702,
0.67350789, 0.73787198, 0.87638682, 0.86392944
);
// Compute primary cascade and an optional neighbor for cross-fade near borders // Compute primary cascade and an optional neighbor for cross-fade near borders
struct CascadeMix { uint i0; uint i1; float w1; }; struct CascadeMix { uint i0; uint i1; float w1; };
@@ -146,11 +155,13 @@ float sampleCascadeShadow(uint ci, vec3 worldPos, vec3 N, vec3 L)
// Slope-based tiny baseline bias (cheap safety net) // Slope-based tiny baseline bias (cheap safety net)
float NoL = max(dot(N, L), 0.0); float NoL = max(dot(N, L), 0.0);
float slopeBias = max(0.0006 * (1.0 - NoL), SHADOW_MIN_BIAS); float slopeBias = max(0.0006 * (1.0 - NoL), SHADOW_MIN_BIAS);
float currentBias = current + slopeBias;
// Receiver-plane depth gradient in shadow UV space // Receiver-plane depth gradient in shadow UV space
vec3 dndc_dx = dFdx(ndc); vec3 dndc_dx = dFdx(ndc);
vec3 dndc_dy = dFdy(ndc); vec3 dndc_dy = dFdy(ndc);
vec2 dz_duv = receiverPlaneDepthGradient(ndc, dndc_dx, dndc_dy); vec2 dz_duv = receiverPlaneDepthGradient(ndc, dndc_dx, dndc_dy);
vec2 abs_dz_duv = abs(dz_duv) * SHADOW_RPDB_SCALE;
ivec2 dim = textureSize(shadowTex[ci], 0); ivec2 dim = textureSize(shadowTex[ci], 0);
vec2 texelSize = 1.0 / vec2(dim); vec2 texelSize = 1.0 / vec2(dim);
@@ -171,16 +182,15 @@ float sampleCascadeShadow(uint ci, vec3 worldPos, vec3 N, vec3 L)
vec2 pu = rot * POISSON_16[i]; vec2 pu = rot * POISSON_16[i];
vec2 off = pu * radius * texelSize; // uv-space offset of this tap vec2 off = pu * radius * texelSize; // uv-space offset of this tap
float pr = length(pu); float w = POISSON_16_WEIGHT[i];
float w = 1.0 - smoothstep(0.0, 0.65, pr);
float mapD = texture(shadowTex[ci], suv + off).r; float mapD = texture(shadowTex[ci], suv + off).r;
// Receiver-plane depth bias: conservative depth delta over this tap's offset // Receiver-plane depth bias: conservative depth delta over this tap's offset
// Approximate |Δz| ≈ |dz/du|*|Δu| + |dz/dv|*|Δv| // Approximate |Δz| ≈ |dz/du|*|Δu| + |dz/dv|*|Δv|
float rpdb = dot(abs(dz_duv), abs(off)) * SHADOW_RPDB_SCALE; float rpdb = dot(abs_dz_duv, abs(off));
float vis = step(mapD, current + slopeBias + rpdb); float vis = step(mapD, currentBias + rpdb);
visible += vis * w; visible += vis * w;
wsum += w; wsum += w;
@@ -255,11 +265,12 @@ void main(){
// Image-Based Lighting: split-sum approximation // Image-Based Lighting: split-sum approximation
vec3 R = reflect(-V, N); vec3 R = reflect(-V, N);
float NdotV = max(dot(N, V), 0.0);
float levels = float(textureQueryLevels(iblSpec2D)); float levels = float(textureQueryLevels(iblSpec2D));
float lod = ibl_lod_from_roughness(roughness, levels); float lod = ibl_lod_from_roughness(roughness, levels);
vec2 uv = dir_to_equirect(R); vec2 uv = dir_to_equirect_normalized(R);
vec3 prefiltered = textureLod(iblSpec2D, uv, lod).rgb; vec3 prefiltered = textureLod(iblSpec2D, uv, lod).rgb;
vec2 brdf = texture(iblBRDF, vec2(max(dot(N, V), 0.0), roughness)).rg; vec2 brdf = texture(iblBRDF, vec2(NdotV, roughness)).rg;
vec3 F0 = mix(vec3(0.04), albedo, metallic); vec3 F0 = mix(vec3(0.04), albedo, metallic);
vec3 specIBL = prefiltered * (F0 * brdf.x + brdf.y); vec3 specIBL = prefiltered * (F0 * brdf.x + brdf.y);
vec3 diffIBL = (1.0 - metallic) * albedo * sh_eval_irradiance(N); vec3 diffIBL = (1.0 - metallic) * albedo * sh_eval_irradiance(N);

View File

@@ -54,16 +54,22 @@ void main() {
// Normal mapping: decode tangent-space normal and transform to world space // Normal mapping: decode tangent-space normal and transform to world space
// Expect UNORM normal map; support BC5 (RG) by reconstructing Z from XY. // Expect UNORM normal map; support BC5 (RG) by reconstructing Z from XY.
vec2 enc = texture(normalMap, inUV).xy * 2.0 - 1.0; vec3 N = normalize(inNormal);
vec3 Nw = N;
float normalScale = max(materialData.extra[0].x, 0.0); float normalScale = max(materialData.extra[0].x, 0.0);
if (normalScale > 0.0)
{
vec2 enc = texture(normalMap, inUV).xy * 2.0 - 1.0;
enc *= normalScale; enc *= normalScale;
float z2 = 1.0 - dot(enc, enc); float z2 = 1.0 - dot(enc, enc);
float nz = z2 > 0.0 ? sqrt(z2) : 0.0; float nz = z2 > 0.0 ? sqrt(z2) : 0.0;
vec3 Nm = vec3(enc, nz); vec3 Nm = vec3(enc, nz);
vec3 N = normalize(inNormal);
vec3 T = normalize(inTangent.xyz); vec3 T = normalize(inTangent.xyz);
vec3 B = normalize(cross(N, T)) * inTangent.w; vec3 B = normalize(cross(N, T)) * inTangent.w;
vec3 Nw = normalize(T * Nm.x + B * Nm.y + N * Nm.z); Nw = normalize(T * Nm.x + B * Nm.y + N * Nm.z);
}
outPos = vec4(inWorldPos, 1.0); outPos = vec4(inWorldPos, 1.0);
outNorm = vec4(Nw, roughness); outNorm = vec4(Nw, roughness);
@@ -72,15 +78,20 @@ void main() {
// extra[0].y = AO strength, extra[0].z = hasAO flag (1 = use AO texture) // extra[0].y = AO strength, extra[0].z = hasAO flag (1 = use AO texture)
float hasAO = materialData.extra[0].z; float hasAO = materialData.extra[0].z;
float aoStrength = clamp(materialData.extra[0].y, 0.0, 1.0); float aoStrength = clamp(materialData.extra[0].y, 0.0, 1.0);
float aoTex = texture(occlusionTex, inUV).r;
float ao = 1.0; float ao = 1.0;
if (hasAO > 0.5) if (hasAO > 0.5 && aoStrength > 0.0)
{ {
float aoTex = texture(occlusionTex, inUV).r;
ao = 1.0 - aoStrength + aoStrength * aoTex; ao = 1.0 - aoStrength + aoStrength * aoTex;
} }
vec3 emissive = vec3(0.0);
vec3 emissiveFactor = materialData.extra[1].rgb; vec3 emissiveFactor = materialData.extra[1].rgb;
vec3 emissiveTex = texture(emissiveTex, inUV).rgb; if (any(greaterThan(emissiveFactor, vec3(0.0))))
vec3 emissive = emissiveTex * emissiveFactor; {
vec3 emissiveSample = texture(emissiveTex, inUV).rgb;
emissive = emissiveSample * emissiveFactor;
}
outExtra = vec4(ao, emissive); outExtra = vec4(ao, emissive);
outObjectID = PushConstants.objectID; outObjectID = PushConstants.objectID;
} }

View File

@@ -17,34 +17,38 @@ vec3 sh_eval_irradiance(vec3 n)
const float c2 = 1.0925484306; const float c2 = 1.0925484306;
const float c3 = 0.3153915653; const float c3 = 0.3153915653;
const float c4 = 0.5462742153; const float c4 = 0.5462742153;
float Y[9];
Y[0] = c0; float x2 = x * x;
Y[1] = c1 * y; float y2 = y * y;
Y[2] = c1 * z; float z2 = z * z;
Y[3] = c1 * x;
Y[4] = c2 * x * y;
Y[5] = c2 * y * z;
Y[6] = c3 * (3.0 * z * z - 1.0);
Y[7] = c2 * x * z;
Y[8] = c4 * (x * x - y * y);
vec3 r = vec3(0.0); vec3 r = vec3(0.0);
for (int i = 0; i < 9; ++i) r += iblSH.sh[0].rgb * c0;
{ r += iblSH.sh[1].rgb * (c1 * y);
r += iblSH.sh[i].rgb * Y[i]; r += iblSH.sh[2].rgb * (c1 * z);
} r += iblSH.sh[3].rgb * (c1 * x);
r += iblSH.sh[4].rgb * (c2 * x * y);
r += iblSH.sh[5].rgb * (c2 * y * z);
r += iblSH.sh[6].rgb * (c3 * (3.0 * z2 - 1.0));
r += iblSH.sh[7].rgb * (c2 * x * z);
r += iblSH.sh[8].rgb * (c4 * (x2 - y2));
return r; return r;
} }
// Map direction to equirectangular UV (same convention across shaders). // Map direction to equirectangular UV (same convention across shaders).
vec2 dir_to_equirect(vec3 d) vec2 dir_to_equirect_normalized(vec3 d)
{ {
d = normalize(d);
float phi = atan(d.z, d.x); float phi = atan(d.z, d.x);
float theta = acos(clamp(d.y, -1.0, 1.0)); float theta = acos(clamp(d.y, -1.0, 1.0));
// 1/(2*pi) = 0.15915494309, 1/pi = 0.31830988618 // 1/(2*pi) = 0.15915494309, 1/pi = 0.31830988618
return vec2(phi * 0.15915494309 + 0.5, theta * 0.31830988618); return vec2(phi * 0.15915494309 + 0.5, theta * 0.31830988618);
} }
vec2 dir_to_equirect(vec3 d)
{
return dir_to_equirect_normalized(normalize(d));
}
// Helper for selecting mip LOD from roughness and available levels. // Helper for selecting mip LOD from roughness and available levels.
// Uses roughness^2 to bias towards blurrier reflections at mid roughness. // Uses roughness^2 to bias towards blurrier reflections at mid roughness.
float ibl_lod_from_roughness(float roughness, float levels) float ibl_lod_from_roughness(float roughness, float levels)
@@ -55,4 +59,3 @@ float ibl_lod_from_roughness(float roughness, float levels)
} }
#endif // IBL_COMMON_GLSL #endif // IBL_COMMON_GLSL

View File

@@ -3,9 +3,16 @@
const float PI = 3.14159265359; const float PI = 3.14159265359;
float pow5(float x)
{
float x2 = x * x;
return x2 * x2 * x;
}
vec3 fresnelSchlick(float cosTheta, vec3 F0) vec3 fresnelSchlick(float cosTheta, vec3 F0)
{ {
return F0 + (1.0 - F0) * pow(1.0 - cosTheta, 5.0); float m = clamp(1.0 - cosTheta, 0.0, 1.0);
return F0 + (1.0 - F0) * pow5(m);
} }
float DistributionGGX(vec3 N, vec3 H, float roughness) float DistributionGGX(vec3 N, vec3 H, float roughness)
@@ -42,19 +49,21 @@ vec3 evaluate_brdf(vec3 N, vec3 V, vec3 L, vec3 albedo, float roughness, float m
{ {
vec3 H = normalize(V + L); vec3 H = normalize(V + L);
float NdotV = max(dot(N, V), 0.0);
float NdotL = max(dot(N, L), 0.0);
vec3 F0 = mix(vec3(0.04), albedo, metallic); vec3 F0 = mix(vec3(0.04), albedo, metallic);
vec3 F = fresnelSchlick(max(dot(H, V), 0.0), F0); vec3 F = fresnelSchlick(max(dot(H, V), 0.0), F0);
float NDF = DistributionGGX(N, H, roughness); float NDF = DistributionGGX(N, H, roughness);
float G = GeometrySmith(N, V, L, roughness); float G = GeometrySmith(N, V, L, roughness);
vec3 numerator = NDF * G * F; vec3 numerator = NDF * G * F;
float denom = 4.0 * max(dot(N, V), 0.0) * max(dot(N, L), 0.0); float denom = 4.0 * NdotV * NdotL;
vec3 specular = numerator / max(denom, 0.001); vec3 specular = numerator / max(denom, 0.001);
vec3 kS = F; vec3 kS = F;
vec3 kD = (1.0 - kS) * (1.0 - metallic); vec3 kD = (1.0 - kS) * (1.0 - metallic);
float NdotL = max(dot(N, L), 0.0);
return (kD * albedo / PI + specular) * NdotL; return (kD * albedo / PI + specular) * NdotL;
} }
@@ -62,16 +71,19 @@ vec3 eval_point_light(GPUPunctualLight light, vec3 pos, vec3 N, vec3 V, vec3 alb
{ {
vec3 lightPos = light.position_radius.xyz; vec3 lightPos = light.position_radius.xyz;
float radius = max(light.position_radius.w, 0.0001); float radius = max(light.position_radius.w, 0.0001);
vec3 L = lightPos - pos;
float dist = length(L); vec3 toLight = lightPos - pos;
if (dist <= 0.0001) float dist2 = dot(toLight, toLight);
if (dist2 <= 1.0e-8)
{ {
return vec3(0.0); return vec3(0.0);
} }
L /= dist; float invDist = inversesqrt(dist2);
float dist = dist2 * invDist;
vec3 L = toLight * invDist;
// Smooth falloff: inverse-square with soft clamp at radius // Smooth falloff: inverse-square with soft clamp at radius
float att = 1.0 / max(dist * dist, 0.0001); float att = 1.0 / max(dist2, 1.0e-8);
float x = clamp(dist / radius, 0.0, 1.0); float x = clamp(dist / radius, 0.0, 1.0);
float smth = (1.0 - x * x); float smth = (1.0 - x * x);
smth *= smth; smth *= smth;
@@ -88,14 +100,17 @@ vec3 eval_spot_light(GPUSpotLight light, vec3 pos, vec3 N, vec3 V, vec3 albedo,
float radius = max(light.position_radius.w, 0.0001); float radius = max(light.position_radius.w, 0.0001);
vec3 toLight = lightPos - pos; vec3 toLight = lightPos - pos;
float dist = length(toLight); float dist2 = dot(toLight, toLight);
if (dist <= 0.0001) if (dist2 <= 1.0e-8)
{ {
return vec3(0.0); return vec3(0.0);
} }
vec3 L = toLight / dist; // surface -> light float invDist = inversesqrt(dist2);
float dist = dist2 * invDist;
vec3 L = toLight * invDist; // surface -> light
vec3 dir = normalize(light.direction_cos_outer.xyz); // light -> forward // direction_cos_outer.xyz is expected to be unit length (normalized on the CPU).
vec3 dir = light.direction_cos_outer.xyz; // light -> forward
float cosOuter = light.direction_cos_outer.w; float cosOuter = light.direction_cos_outer.w;
float cosInner = light.cone.x; float cosInner = light.cone.x;
float cosTheta = dot(-L, dir); // light -> surface vs light forward float cosTheta = dot(-L, dir); // light -> surface vs light forward
@@ -108,7 +123,7 @@ vec3 eval_spot_light(GPUSpotLight light, vec3 pos, vec3 N, vec3 V, vec3 albedo,
spot *= spot; spot *= spot;
// Smooth falloff: inverse-square with soft clamp at radius // Smooth falloff: inverse-square with soft clamp at radius
float att = 1.0 / max(dist * dist, 0.0001); float att = 1.0 / max(dist2, 1.0e-8);
float x = clamp(dist / radius, 0.0, 1.0); float x = clamp(dist / radius, 0.0, 1.0);
float smth = (1.0 - x * x); float smth = (1.0 - x * x);
smth *= smth; smth *= smth;

View File

@@ -44,16 +44,22 @@ void main()
// Normal mapping path for forward/transparent pipeline // Normal mapping path for forward/transparent pipeline
// Expect UNORM normal map; support BC5 (RG) by reconstructing Z from XY. // Expect UNORM normal map; support BC5 (RG) by reconstructing Z from XY.
vec2 enc = texture(normalMap, inUV).xy * 2.0 - 1.0; vec3 Nn = normalize(inNormal);
vec3 N = Nn;
float normalScale = max(materialData.extra[0].x, 0.0); float normalScale = max(materialData.extra[0].x, 0.0);
if (normalScale > 0.0)
{
vec2 enc = texture(normalMap, inUV).xy * 2.0 - 1.0;
enc *= normalScale; enc *= normalScale;
float z2 = 1.0 - dot(enc, enc); float z2 = 1.0 - dot(enc, enc);
float nz = z2 > 0.0 ? sqrt(z2) : 0.0; float nz = z2 > 0.0 ? sqrt(z2) : 0.0;
vec3 Nm = vec3(enc, nz); vec3 Nm = vec3(enc, nz);
vec3 Nn = normalize(inNormal);
vec3 T = normalize(inTangent.xyz); vec3 T = normalize(inTangent.xyz);
vec3 B = normalize(cross(Nn, T)) * inTangent.w; vec3 B = normalize(cross(Nn, T)) * inTangent.w;
vec3 N = normalize(T * Nm.x + B * Nm.y + Nn * Nm.z); N = normalize(T * Nm.x + B * Nm.y + Nn * Nm.z);
}
vec3 camPos = getCameraWorldPosition(); vec3 camPos = getCameraWorldPosition();
vec3 V = normalize(camPos - inWorldPos); vec3 V = normalize(camPos - inWorldPos);
@@ -78,11 +84,12 @@ void main()
// IBL: specular from equirect 2D mips; diffuse from SH // IBL: specular from equirect 2D mips; diffuse from SH
vec3 R = reflect(-V, N); vec3 R = reflect(-V, N);
float NdotV = max(dot(N, V), 0.0);
float levels = float(textureQueryLevels(iblSpec2D)); float levels = float(textureQueryLevels(iblSpec2D));
float lod = ibl_lod_from_roughness(roughness, levels); float lod = ibl_lod_from_roughness(roughness, levels);
vec2 uv = dir_to_equirect(R); vec2 uv = dir_to_equirect_normalized(R);
vec3 prefiltered = textureLod(iblSpec2D, uv, lod).rgb; vec3 prefiltered = textureLod(iblSpec2D, uv, lod).rgb;
vec2 brdf = texture(iblBRDF, vec2(max(dot(N, V), 0.0), roughness)).rg; vec2 brdf = texture(iblBRDF, vec2(NdotV, roughness)).rg;
vec3 F0 = mix(vec3(0.04), albedo, metallic); vec3 F0 = mix(vec3(0.04), albedo, metallic);
vec3 specIBL = prefiltered * (F0 * brdf.x + brdf.y); vec3 specIBL = prefiltered * (F0 * brdf.x + brdf.y);
vec3 diffIBL = (1.0 - metallic) * albedo * sh_eval_irradiance(N); vec3 diffIBL = (1.0 - metallic) * albedo * sh_eval_irradiance(N);
@@ -91,17 +98,21 @@ void main()
// extra[0].y = AO strength, extra[0].z = hasAO flag (1 = use AO texture) // extra[0].y = AO strength, extra[0].z = hasAO flag (1 = use AO texture)
float hasAO = materialData.extra[0].z; float hasAO = materialData.extra[0].z;
float aoStrength = clamp(materialData.extra[0].y, 0.0, 1.0); float aoStrength = clamp(materialData.extra[0].y, 0.0, 1.0);
float aoTex = texture(occlusionTex, inUV).r;
float ao = 1.0; float ao = 1.0;
if (hasAO > 0.5) if (hasAO > 0.5 && aoStrength > 0.0)
{ {
float aoTex = texture(occlusionTex, inUV).r;
ao = 1.0 - aoStrength + aoStrength * aoTex; ao = 1.0 - aoStrength + aoStrength * aoTex;
} }
// Emissive from texture and factor // Emissive from texture and factor
vec3 emissive = vec3(0.0);
vec3 emissiveFactor = materialData.extra[1].rgb; vec3 emissiveFactor = materialData.extra[1].rgb;
vec3 emissiveTex = texture(emissiveTex, inUV).rgb; if (any(greaterThan(emissiveFactor, vec3(0.0))))
vec3 emissive = emissiveTex * emissiveFactor; {
vec3 emissiveSample = texture(emissiveTex, inUV).rgb;
emissive = emissiveSample * emissiveFactor;
}
vec3 indirect = diffIBL + specIBL; vec3 indirect = diffIBL + specIBL;
vec3 color = direct + indirect * ao + emissive; vec3 color = direct + indirect * ao + emissive;

View File

@@ -17,7 +17,12 @@ float NoisyStarField( in vec2 vSamplePos, float fThreshhold )
{ {
float StarVal = Noise2d( vSamplePos ); float StarVal = Noise2d( vSamplePos );
if ( StarVal >= fThreshhold ) if ( StarVal >= fThreshhold )
StarVal = pow( (StarVal - fThreshhold)/(1.0 - fThreshhold), 6.0 ); {
float t = (StarVal - fThreshhold) / (1.0 - fThreshhold);
float t2 = t * t;
float t4 = t2 * t2;
StarVal = t4 * t2;
}
else else
StarVal = 0.0; StarVal = 0.0;
return StarVal; return StarVal;
@@ -57,7 +62,7 @@ void mainImage( out vec4 fragColor, in vec2 fragCoord )
// Stars with a slow crawl. // Stars with a slow crawl.
float xRate = 0.2; float xRate = 0.2;
float yRate = -0.06; float yRate = -0.06;
vec2 vSamplePos = fragCoord.xy + vec2( xRate * float( 1 ), yRate * float( 1 ) ); vec2 vSamplePos = fragCoord.xy + vec2( xRate, yRate );
float StarVal = StableStarField( vSamplePos, StarFieldThreshhold ); float StarVal = StableStarField( vSamplePos, StarFieldThreshhold );
vColor += vec3( StarVal ); vColor += vec3( StarVal );
@@ -80,4 +85,3 @@ void main()
imageStore(image, texelCoord, color); imageStore(image, texelCoord, color);
} }
} }

View File

@@ -20,6 +20,12 @@ vec3 getCameraWorldPosition()
return -rot * T; // C = -R * T return -rot * T; // C = -R * T
} }
float pow5(float x)
{
float x2 = x * x;
return x2 * x2 * x;
}
vec3 projectToScreenFromView(vec3 viewPos) vec3 projectToScreenFromView(vec3 viewPos)
{ {
vec4 clip = sceneData.proj * vec4(viewPos, 1.0); vec4 clip = sceneData.proj * vec4(viewPos, 1.0);
@@ -65,7 +71,7 @@ void main()
vec3 V = normalize(camPos - worldPos); vec3 V = normalize(camPos - worldPos);
vec3 R = reflect(-V, N); vec3 R = reflect(-V, N);
vec3 viewPos = (sceneData.view * vec4(worldPos, 1.0)).xyz; vec3 viewPos = (sceneData.view * vec4(worldPos, 1.0)).xyz;
vec3 viewDir = normalize((sceneData.view * vec4(R, 0.0)).xyz); vec3 viewDir = (sceneData.view * vec4(R, 0.0)).xyz;
float gloss = 1.0 - roughness; float gloss = 1.0 - roughness;
float F0 = mix(0.04, 1.0, metallic); float F0 = mix(0.04, 1.0, metallic);
@@ -125,7 +131,7 @@ void main()
vec3 reflColor = texture(hdrColor, hitUV).rgb; vec3 reflColor = texture(hdrColor, hitUV).rgb;
float NoV = clamp(dot(N, V), 0.0, 1.0); float NoV = clamp(dot(N, V), 0.0, 1.0);
float F = F0 + (1.0 - F0) * pow(1.0 - NoV, 5.0); // Schlick float F = F0 + (1.0 - F0) * pow5(1.0 - NoV); // Schlick
float ssrVisibility = gloss; float ssrVisibility = gloss;
float weight = clamp(F * ssrVisibility, 0.0, 1.0); float weight = clamp(F * ssrVisibility, 0.0, 1.0);

View File

@@ -25,6 +25,12 @@ vec3 getCameraWorldPosition()
return -rot * T; // C = -R * T return -rot * T; // C = -R * T
} }
float pow5(float x)
{
float x2 = x * x;
return x2 * x2 * x;
}
vec3 projectToScreenFromView(vec3 viewPos) vec3 projectToScreenFromView(vec3 viewPos)
{ {
vec4 clip = sceneData.proj * vec4(viewPos, 1.0); vec4 clip = sceneData.proj * vec4(viewPos, 1.0);
@@ -72,7 +78,7 @@ void main()
vec3 V = normalize(camPos - worldPos); vec3 V = normalize(camPos - worldPos);
vec3 R = reflect(-V, N); vec3 R = reflect(-V, N);
vec3 viewPos = (sceneData.view * vec4(worldPos, 1.0)).xyz; vec3 viewPos = (sceneData.view * vec4(worldPos, 1.0)).xyz;
vec3 viewDir = normalize((sceneData.view * vec4(R, 0.0)).xyz); vec3 viewDir = (sceneData.view * vec4(R, 0.0)).xyz;
float gloss = 1.0 - roughness; float gloss = 1.0 - roughness;
float F0 = mix(0.04, 1.0, metallic); float F0 = mix(0.04, 1.0, metallic);
@@ -146,7 +152,7 @@ void main()
vec3 reflColor = texture(hdrColor, ssrUV).rgb; vec3 reflColor = texture(hdrColor, ssrUV).rgb;
float NoV = clamp(dot(N, V), 0.0, 1.0); float NoV = clamp(dot(N, V), 0.0, 1.0);
float F = F0 + (1.0 - F0) * pow(1.0 - NoV, 5.0); // Schlick float F = F0 + (1.0 - F0) * pow5(1.0 - NoV); // Schlick
float ssrVisibility = gloss; float ssrVisibility = gloss;
float weight = clamp(F * ssrVisibility, 0.0, 1.0); float weight = clamp(F * ssrVisibility, 0.0, 1.0);
@@ -198,7 +204,7 @@ void main()
vec3 reflColor = texture(hdrColor, hitUV).rgb; vec3 reflColor = texture(hdrColor, hitUV).rgb;
float NoV = clamp(dot(N, V), 0.0, 1.0); float NoV = clamp(dot(N, V), 0.0, 1.0);
float F = F0 + (1.0 - F0) * pow(1.0 - NoV, 5.0); // Schlick float F = F0 + (1.0 - F0) * pow5(1.0 - NoV); // Schlick
float rtVisibility = gloss; float rtVisibility = gloss;
float weight = clamp(F * rtVisibility, 0.0, 1.0); float weight = clamp(F * rtVisibility, 0.0, 1.0);