FIX: Memory error fix, debug scheme
This commit is contained in:
273
shaders/deferred_lighting_nort.frag
Normal file
273
shaders/deferred_lighting_nort.frag
Normal file
@@ -0,0 +1,273 @@
|
||||
#version 450
|
||||
#extension GL_GOOGLE_include_directive : require
|
||||
#include "input_structures.glsl"
|
||||
|
||||
layout(location=0) in vec2 inUV;
|
||||
layout(location=0) out vec4 outColor;
|
||||
|
||||
layout(set=1, binding=0) uniform sampler2D posTex;
|
||||
layout(set=1, binding=1) uniform sampler2D normalTex;
|
||||
layout(set=1, binding=2) uniform sampler2D albedoTex;
|
||||
layout(set=2, binding=0) uniform sampler2D shadowTex[4];
|
||||
|
||||
// Tunables for shadow quality and blending
|
||||
// Border smoothing width in light-space NDC (0..1). Larger = wider cross-fade.
|
||||
const float SHADOW_BORDER_SMOOTH_NDC = 0.08;
|
||||
// Base PCF radius in texels for cascade 0; higher cascades scale this up slightly.
|
||||
const float SHADOW_PCF_BASE_RADIUS = 1.35;
|
||||
// Additional per-cascade radius scale for coarser cascades (0..1 factor added across levels)
|
||||
const float SHADOW_PCF_CASCADE_GAIN = 2.0; // extra radius at far end
|
||||
// Receiver normal-based offset to reduce acne (in world units)
|
||||
const float SHADOW_NORMAL_OFFSET = 0.0025;
|
||||
// Scale for receiver-plane depth bias term (tweak if over/under biased)
|
||||
const float SHADOW_RPDB_SCALE = 1.0;
|
||||
// Minimum clamp to keep a tiny bias even on perpendicular receivers
|
||||
const float SHADOW_MIN_BIAS = 1e-5;
|
||||
|
||||
const float PI = 3.14159265359;
|
||||
|
||||
float hash12(vec2 p)
|
||||
{
|
||||
vec3 p3 = fract(vec3(p.xyx) * 0.1031);
|
||||
p3 += dot(p3, p3.yzx + 33.33); return fract((p3.x + p3.y) * p3.z);
|
||||
}
|
||||
|
||||
const vec2 POISSON_16[16] = vec2[16](
|
||||
vec2(0.2852, -0.1883), vec2(-0.1464, 0.2591),
|
||||
vec2(-0.3651, -0.0974), vec2(0.0901, 0.3807),
|
||||
vec2(0.4740, 0.0679), vec2(-0.0512, -0.4466),
|
||||
vec2(-0.4497, 0.1673), vec2(0.3347, 0.3211),
|
||||
vec2(0.1948, -0.4196), vec2(-0.2919, -0.3291),
|
||||
vec2(-0.0763, 0.4661), vec2(0.4421, -0.2217),
|
||||
vec2(0.0281, -0.2468), vec2(-0.2104, 0.0573),
|
||||
vec2(0.1197, 0.0779), vec2(-0.0905, -0.1203)
|
||||
);
|
||||
|
||||
// Compute primary cascade and an optional neighbor for cross-fade near borders
|
||||
struct CascadeMix { uint i0; uint i1; float w1; };
|
||||
|
||||
CascadeMix computeCascadeMix(vec3 worldPos)
|
||||
{
|
||||
uint primary = 3u;
|
||||
vec3 ndcP = vec3(0);
|
||||
for (uint i = 0u; i < 4u; ++i)
|
||||
{
|
||||
vec4 lclip = sceneData.lightViewProjCascades[i] * vec4(worldPos, 1.0);
|
||||
vec3 ndc = lclip.xyz / max(lclip.w, 1e-6);
|
||||
if (abs(ndc.x) <= 1.0 && abs(ndc.y) <= 1.0 && ndc.z >= 0.0 && ndc.z <= 1.0)
|
||||
{
|
||||
primary = i;
|
||||
ndcP = ndc;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
CascadeMix cm; cm.i0 = primary; cm.i1 = primary; cm.w1 = 0.0;
|
||||
|
||||
if (primary < 3u)
|
||||
{
|
||||
float edge = max(abs(ndcP.x), abs(ndcP.y)); // 0..1, 1 at border
|
||||
// start blending when we are within S of the border
|
||||
float t = clamp((edge - (1.0 - SHADOW_BORDER_SMOOTH_NDC)) / max(SHADOW_BORDER_SMOOTH_NDC, 1e-4), 0.0, 1.0);
|
||||
float w = smoothstep(0.0, 1.0, t);
|
||||
|
||||
if (w > 0.0)
|
||||
{
|
||||
// Only blend if neighbor actually covers the point
|
||||
uint neighbor = primary + 1u;
|
||||
vec4 lclipN = sceneData.lightViewProjCascades[neighbor] * vec4(worldPos, 1.0);
|
||||
vec3 ndcN = lclipN.xyz / max(lclipN.w, 1e-6);
|
||||
bool insideN = (abs(ndcN.x) <= 1.0 && abs(ndcN.y) <= 1.0 && ndcN.z >= 0.0 && ndcN.z <= 1.0);
|
||||
if (insideN)
|
||||
{
|
||||
cm.i1 = neighbor;
|
||||
cm.w1 = w;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return cm;
|
||||
}
|
||||
|
||||
// Compute receiver-plane depth gradient dz/duv using derivatives of shadow NDC
|
||||
// Reference: Akenine-Möller et al., "Receiver Plane Depth Bias" (PCF-friendly)
|
||||
vec2 receiverPlaneDepthGradient(vec3 ndc, vec3 dndc_dx, vec3 dndc_dy)
|
||||
{
|
||||
// Convert XY to shadow map UV derivatives (ndc -> uv: u = 0.5*x + 0.5)
|
||||
vec2 duv_dx = 0.5 * dndc_dx.xy;
|
||||
vec2 duv_dy = 0.5 * dndc_dy.xy;
|
||||
|
||||
// Build Jacobian J = [du/dx du/dy; dv/dx dv/dy] (column-major)
|
||||
mat2 J = mat2(duv_dx.x, duv_dy.x,
|
||||
duv_dx.y, duv_dy.y);
|
||||
|
||||
// Depth derivatives w.r.t screen pixels
|
||||
vec2 dz_dxdy = vec2(dndc_dx.z, dndc_dy.z);
|
||||
|
||||
// Invert J to obtain dz/du and dz/dv. Guard against near-singular Jacobian.
|
||||
float det = J[0][0] * J[1][1] - J[1][0] * J[0][1];
|
||||
if (abs(det) < 1e-8)
|
||||
{
|
||||
// Degenerate mapping; return zero gradient so only slope/const bias applies
|
||||
return vec2(0.0);
|
||||
}
|
||||
|
||||
// Manual inverse for stability/perf on some drivers
|
||||
mat2 invJ = (1.0 / det) * mat2( J[1][1], -J[0][1],
|
||||
-J[1][0], J[0][0]);
|
||||
return invJ * dz_dxdy; // (dz/du, dz/dv)
|
||||
}
|
||||
|
||||
float sampleCascadeShadow(uint ci, vec3 worldPos, vec3 N, vec3 L)
|
||||
{
|
||||
mat4 lightMat = sceneData.lightViewProjCascades[ci];
|
||||
|
||||
vec4 lclip = lightMat * vec4(worldPos, 1.0);
|
||||
vec3 ndc = lclip.xyz / lclip.w;
|
||||
vec2 suv = ndc.xy * 0.5 + 0.5;
|
||||
|
||||
if (any(lessThan(suv, vec2(0.0))) || any(greaterThan(suv, vec2(1.0))))
|
||||
return 1.0;
|
||||
|
||||
float current = clamp(ndc.z, 0.0, 1.0);
|
||||
|
||||
// Slope-based tiny baseline bias (cheap safety net)
|
||||
float NoL = max(dot(N, L), 0.0);
|
||||
float slopeBias = max(0.0006 * (1.0 - NoL), SHADOW_MIN_BIAS);
|
||||
|
||||
// Receiver-plane depth gradient in shadow UV space
|
||||
vec3 dndc_dx = dFdx(ndc);
|
||||
vec3 dndc_dy = dFdy(ndc);
|
||||
vec2 dz_duv = receiverPlaneDepthGradient(ndc, dndc_dx, dndc_dy);
|
||||
|
||||
ivec2 dim = textureSize(shadowTex[ci], 0);
|
||||
vec2 texelSize = 1.0 / vec2(dim);
|
||||
|
||||
float baseRadius = SHADOW_PCF_BASE_RADIUS;
|
||||
float radius = mix(baseRadius, baseRadius + SHADOW_PCF_CASCADE_GAIN, float(ci) / 3.0);
|
||||
|
||||
float ang = hash12(suv * 4096.0) * 6.2831853;
|
||||
vec2 r = vec2(cos(ang), sin(ang));
|
||||
mat2 rot = mat2(r.x, -r.y, r.y, r.x);
|
||||
|
||||
const int TAP_COUNT = 16;
|
||||
float visible = 0.0;
|
||||
float wsum = 0.0;
|
||||
|
||||
for (int i = 0; i < TAP_COUNT; ++i)
|
||||
{
|
||||
vec2 pu = rot * POISSON_16[i];
|
||||
vec2 off = pu * radius * texelSize; // uv-space offset of this tap
|
||||
|
||||
float pr = length(pu);
|
||||
float w = 1.0 - smoothstep(0.0, 0.65, pr);
|
||||
|
||||
float mapD = texture(shadowTex[ci], suv + off).r;
|
||||
|
||||
// Receiver-plane depth bias: conservative depth delta over this tap's offset
|
||||
// Approximate |Δz| ≈ |dz/du|*|Δu| + |dz/dv|*|Δv|
|
||||
float rpdb = dot(abs(dz_duv), abs(off)) * SHADOW_RPDB_SCALE;
|
||||
|
||||
float vis = step(mapD, current + slopeBias + rpdb);
|
||||
|
||||
visible += vis * w;
|
||||
wsum += w;
|
||||
}
|
||||
|
||||
float visibility = (wsum > 0.0) ? (visible / wsum) : 1.0;
|
||||
return visibility;
|
||||
}
|
||||
|
||||
float calcShadowVisibility(vec3 worldPos, vec3 N, vec3 L)
|
||||
{
|
||||
vec3 wp = worldPos + N * SHADOW_NORMAL_OFFSET * (0.5 + 0.5 * (1.0 - max(dot(N, L), 0.0)));
|
||||
|
||||
CascadeMix cm = computeCascadeMix(wp);
|
||||
float v0 = sampleCascadeShadow(cm.i0, wp, N, L);
|
||||
if (cm.w1 <= 0.0)
|
||||
return v0;
|
||||
|
||||
float v1 = sampleCascadeShadow(cm.i1, wp, N, L);
|
||||
return mix(v0, v1, clamp(cm.w1, 0.0, 1.0));
|
||||
}
|
||||
|
||||
vec3 fresnelSchlick(float cosTheta, vec3 F0)
|
||||
{
|
||||
return F0 + (1.0 - F0) * pow(1.0 - cosTheta, 5.0);
|
||||
}
|
||||
|
||||
float DistributionGGX(vec3 N, vec3 H, float roughness)
|
||||
{
|
||||
float a = roughness * roughness;
|
||||
float a2 = a * a;
|
||||
float NdotH = max(dot(N, H), 0.0);
|
||||
float NdotH2 = NdotH * NdotH;
|
||||
|
||||
float num = a2;
|
||||
float denom = (NdotH2 * (a2 - 1.0) + 1.0);
|
||||
denom = PI * denom * denom;
|
||||
|
||||
return num / max(denom, 0.001);
|
||||
}
|
||||
|
||||
float GeometrySchlickGGX(float NdotV, float roughness)
|
||||
{
|
||||
float r = (roughness + 1.0);
|
||||
float k = (r * r) / 8.0;
|
||||
|
||||
float denom = NdotV * (1.0 - k) + k;
|
||||
return NdotV / max(denom, 0.001);
|
||||
}
|
||||
|
||||
float GeometrySmith(vec3 N, vec3 V, vec3 L, float roughness)
|
||||
{
|
||||
float ggx2 = GeometrySchlickGGX(max(dot(N, V), 0.0), roughness);
|
||||
float ggx1 = GeometrySchlickGGX(max(dot(N, L), 0.0), roughness);
|
||||
return ggx1 * ggx2;
|
||||
}
|
||||
|
||||
void main(){
|
||||
vec4 posSample = texture(posTex, inUV);
|
||||
if (posSample.w == 0.0)
|
||||
{
|
||||
outColor = vec4(0.0);
|
||||
return;
|
||||
}
|
||||
|
||||
vec3 pos = posSample.xyz;
|
||||
vec4 normalSample = texture(normalTex, inUV);
|
||||
vec3 N = normalize(normalSample.xyz);
|
||||
float roughness = clamp(normalSample.w, 0.04, 1.0);
|
||||
|
||||
vec4 albedoSample = texture(albedoTex, inUV);
|
||||
vec3 albedo = albedoSample.rgb;
|
||||
float metallic = clamp(albedoSample.a, 0.0, 1.0);
|
||||
|
||||
vec3 camPos = vec3(inverse(sceneData.view)[3]);
|
||||
vec3 V = normalize(camPos - pos);
|
||||
vec3 L = normalize(-sceneData.sunlightDirection.xyz);
|
||||
vec3 H = normalize(V + L);
|
||||
|
||||
vec3 F0 = mix(vec3(0.04), albedo, metallic);
|
||||
vec3 F = fresnelSchlick(max(dot(H, V), 0.0), F0);
|
||||
float NDF = DistributionGGX(N, H, roughness);
|
||||
float G = GeometrySmith(N, V, L, roughness);
|
||||
|
||||
vec3 numerator = NDF * G * F;
|
||||
float denom = 4.0 * max(dot(N, V), 0.0) * max(dot(N, L), 0.0);
|
||||
vec3 specular = numerator / max(denom, 0.001);
|
||||
|
||||
vec3 kS = F;
|
||||
vec3 kD = (1.0 - kS) * (1.0 - metallic);
|
||||
|
||||
float NdotL = max(dot(N, L), 0.0);
|
||||
// Shadowing (directional, forward-Z shadow map)
|
||||
float visibility = calcShadowVisibility(pos, N, L);
|
||||
|
||||
vec3 irradiance = sceneData.sunlightColor.rgb * sceneData.sunlightColor.a * NdotL * visibility;
|
||||
|
||||
vec3 color = (kD * albedo / PI + specular) * irradiance;
|
||||
color += albedo * sceneData.ambientColor.rgb;
|
||||
|
||||
outColor = vec4(color, 1.0);
|
||||
}
|
||||
@@ -6,6 +6,7 @@
|
||||
|
||||
#include "vk_device.h"
|
||||
#include "core/vk_resource.h"
|
||||
#include "frame_resources.h"
|
||||
|
||||
ComputeBinding ComputeBinding::uniformBuffer(uint32_t binding, VkBuffer buffer, VkDeviceSize size, VkDeviceSize offset)
|
||||
{
|
||||
@@ -354,9 +355,22 @@ void ComputeManager::dispatchInstance(VkCommandBuffer cmd, const std::string &in
|
||||
|
||||
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline.getPipeline());
|
||||
|
||||
updateDescriptorSet(it->second.descriptorSet, it->second.bindings);
|
||||
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline.getLayout(), 0, 1, &it->second.descriptorSet,
|
||||
0, nullptr);
|
||||
// Allocate a transient per-frame descriptor set to avoid updating a set
|
||||
// that might still be in use by a previous in-flight frame.
|
||||
VkDescriptorSet transientSet = context->currentFrame
|
||||
? context->currentFrame->_frameDescriptors.allocate(context->getDevice()->device(), pipeline.descriptorLayout)
|
||||
: VK_NULL_HANDLE;
|
||||
if (transientSet == VK_NULL_HANDLE)
|
||||
{
|
||||
// Fallback to instance-owned set if per-frame allocator unavailable
|
||||
updateDescriptorSet(it->second.descriptorSet, it->second.bindings);
|
||||
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline.getLayout(), 0, 1, &it->second.descriptorSet, 0, nullptr);
|
||||
}
|
||||
else
|
||||
{
|
||||
updateDescriptorSet(transientSet, it->second.bindings);
|
||||
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline.getLayout(), 0, 1, &transientSet, 0, nullptr);
|
||||
}
|
||||
|
||||
if (dispatchInfo.pushConstants && dispatchInfo.pushConstantSize > 0)
|
||||
{
|
||||
@@ -459,9 +473,22 @@ bool ComputeManager::createPipeline(const std::string &name, const ComputePipeli
|
||||
DescriptorLayoutBuilder layoutBuilder;
|
||||
for (size_t i = 0; i < createInfo.descriptorTypes.size(); ++i)
|
||||
{
|
||||
layoutBuilder.add_binding(i, createInfo.descriptorTypes[i]);
|
||||
layoutBuilder.add_binding(static_cast<uint32_t>(i), createInfo.descriptorTypes[i]);
|
||||
}
|
||||
computePipeline.descriptorLayout = layoutBuilder.build(context->getDevice()->device(), VK_SHADER_STAGE_COMPUTE_BIT);
|
||||
|
||||
// Mark all compute bindings as UPDATE_AFTER_BIND so we can update
|
||||
// persistent instance descriptor sets while a previous frame is in-flight.
|
||||
std::vector<VkDescriptorBindingFlags> bindingFlags(createInfo.descriptorTypes.size(),
|
||||
VK_DESCRIPTOR_BINDING_UPDATE_AFTER_BIND_BIT);
|
||||
VkDescriptorSetLayoutBindingFlagsCreateInfo flagsCI{ VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO };
|
||||
flagsCI.bindingCount = static_cast<uint32_t>(bindingFlags.size());
|
||||
flagsCI.pBindingFlags = bindingFlags.data();
|
||||
|
||||
computePipeline.descriptorLayout = layoutBuilder.build(
|
||||
context->getDevice()->device(),
|
||||
VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
&flagsCI,
|
||||
VK_DESCRIPTOR_SET_LAYOUT_CREATE_UPDATE_AFTER_BIND_POOL_BIT);
|
||||
}
|
||||
|
||||
VkPipelineLayoutCreateInfo layoutInfo = vkinit::pipeline_layout_create_info();
|
||||
|
||||
@@ -7,6 +7,22 @@ inline constexpr bool kUseValidationLayers = false;
|
||||
inline constexpr bool kUseValidationLayers = true;
|
||||
#endif
|
||||
|
||||
// VMA diagnostics (stats prints + JSON dumps + allocation naming)
|
||||
// - Default: disabled to avoid noise and I/O at shutdown.
|
||||
// - Enable at runtime by setting environment variable `VE_VMA_DEBUG=1`.
|
||||
#include <cstdlib>
|
||||
inline constexpr bool kEnableVmaDebugByDefault = false;
|
||||
inline bool vmaDebugEnabled()
|
||||
{
|
||||
const char *env = std::getenv("VE_VMA_DEBUG");
|
||||
if (env && *env)
|
||||
{
|
||||
// Accept 1/true/yes (case-insensitive)
|
||||
return (*env == '1') || (*env == 'T') || (*env == 't') || (*env == 'Y') || (*env == 'y');
|
||||
}
|
||||
return kEnableVmaDebugByDefault;
|
||||
}
|
||||
|
||||
// Shadow mapping configuration
|
||||
inline constexpr int kShadowCascadeCount = 4;
|
||||
// Maximum shadow distance for CSM in view-space units
|
||||
@@ -22,9 +38,9 @@ inline constexpr float kShadowCascadeRadiusMargin = 10.0f;
|
||||
inline constexpr float kShadowClipBaseRadius = 20.0f;
|
||||
// When using dynamic pullback, compute it from the covered XY range of each level.
|
||||
// pullback = max(kShadowClipPullbackMin, cover * kShadowClipPullbackFactor)
|
||||
inline constexpr float kShadowClipPullbackFactor = 2.5f; // fraction of XY half-size behind center
|
||||
inline constexpr float kShadowClipForwardFactor = 2.5f; // fraction of XY half-size in front of center for zFar
|
||||
inline constexpr float kShadowClipPullbackMin = 160.0f; // lower bound on pullback so near levels don’t collapse
|
||||
inline constexpr float kShadowClipPullbackFactor = 1.5f; // fraction of XY half-size behind center
|
||||
inline constexpr float kShadowClipForwardFactor = 1.5f; // fraction of XY half-size in front of center for zFar
|
||||
inline constexpr float kShadowClipPullbackMin = 40.0f; // lower bound on pullback so near levels don’t collapse
|
||||
// Additional Z padding for the orthographic frustum along light direction
|
||||
inline constexpr float kShadowClipZPadding = 40.0f;
|
||||
|
||||
|
||||
@@ -9,7 +9,9 @@ void DescriptorManager::init(DeviceManager *deviceManager)
|
||||
{
|
||||
DescriptorLayoutBuilder builder;
|
||||
builder.add_binding(0, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER);
|
||||
_singleImageDescriptorLayout = builder.build(_deviceManager->device(), VK_SHADER_STAGE_FRAGMENT_BIT);
|
||||
_singleImageDescriptorLayout = builder.build(
|
||||
_deviceManager->device(), VK_SHADER_STAGE_FRAGMENT_BIT,
|
||||
nullptr, VK_DESCRIPTOR_SET_LAYOUT_CREATE_UPDATE_AFTER_BIND_POOL_BIT);
|
||||
} {
|
||||
DescriptorLayoutBuilder builder;
|
||||
builder.add_binding(0, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER);
|
||||
@@ -19,7 +21,8 @@ void DescriptorManager::init(DeviceManager *deviceManager)
|
||||
builder.add_binding(1, VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR);
|
||||
}
|
||||
_gpuSceneDataDescriptorLayout = builder.build(
|
||||
_deviceManager->device(), VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT);
|
||||
_deviceManager->device(), VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
|
||||
nullptr, VK_DESCRIPTOR_SET_LAYOUT_CREATE_UPDATE_AFTER_BIND_POOL_BIT);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -77,10 +77,13 @@ void DescriptorWriter::write_image(int binding, VkImageView image, VkSampler sam
|
||||
|
||||
void DescriptorWriter::write_acceleration_structure(int binding, VkAccelerationStructureKHR as)
|
||||
{
|
||||
// Store the handle to ensure the pointer we give to Vulkan stays valid
|
||||
VkAccelerationStructureKHR &storedAS = accelHandles.emplace_back(as);
|
||||
|
||||
VkWriteDescriptorSetAccelerationStructureKHR &acc = accelInfos.emplace_back(
|
||||
VkWriteDescriptorSetAccelerationStructureKHR{ VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR });
|
||||
acc.accelerationStructureCount = 1;
|
||||
acc.pAccelerationStructures = &as;
|
||||
acc.pAccelerationStructures = &storedAS;
|
||||
|
||||
VkWriteDescriptorSet write{ VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET };
|
||||
write.dstBinding = binding;
|
||||
@@ -95,6 +98,8 @@ void DescriptorWriter::clear()
|
||||
imageInfos.clear();
|
||||
writes.clear();
|
||||
bufferInfos.clear();
|
||||
accelInfos.clear();
|
||||
accelHandles.clear();
|
||||
}
|
||||
|
||||
void DescriptorWriter::update_set(VkDevice device, VkDescriptorSet set)
|
||||
@@ -118,7 +123,10 @@ void DescriptorAllocator::init_pool(VkDevice device, uint32_t maxSets, std::span
|
||||
}
|
||||
|
||||
VkDescriptorPoolCreateInfo pool_info = {.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO};
|
||||
pool_info.flags = 0;
|
||||
// Enable update-after-bind so descriptors used by previous frame can be
|
||||
// safely rewritten (e.g., compute instances). It is valid to allocate
|
||||
// non-update-after-bind sets from such a pool.
|
||||
pool_info.flags = VK_DESCRIPTOR_POOL_CREATE_UPDATE_AFTER_BIND_BIT;
|
||||
pool_info.maxSets = maxSets;
|
||||
pool_info.poolSizeCount = (uint32_t) poolSizes.size();
|
||||
pool_info.pPoolSizes = poolSizes.data();
|
||||
@@ -187,7 +195,8 @@ VkDescriptorPool DescriptorAllocatorGrowable::create_pool(VkDevice device, uint3
|
||||
|
||||
VkDescriptorPoolCreateInfo pool_info = {};
|
||||
pool_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
|
||||
pool_info.flags = 0;
|
||||
// Use update-after-bind pools to support cross-frame rewrites.
|
||||
pool_info.flags = VK_DESCRIPTOR_POOL_CREATE_UPDATE_AFTER_BIND_BIT;
|
||||
pool_info.maxSets = setCount;
|
||||
pool_info.poolSizeCount = (uint32_t) poolSizes.size();
|
||||
pool_info.pPoolSizes = poolSizes.data();
|
||||
|
||||
@@ -20,6 +20,8 @@ struct DescriptorWriter
|
||||
std::deque<VkDescriptorImageInfo> imageInfos;
|
||||
std::deque<VkDescriptorBufferInfo> bufferInfos;
|
||||
std::deque<VkWriteDescriptorSetAccelerationStructureKHR> accelInfos;
|
||||
// Keep AS handles alive so pAccelerationStructures points to valid memory
|
||||
std::deque<VkAccelerationStructureKHR> accelHandles;
|
||||
std::vector<VkWriteDescriptorSet> writes;
|
||||
|
||||
void write_image(int binding, VkImageView image, VkSampler sampler, VkImageLayout layout, VkDescriptorType type);
|
||||
|
||||
@@ -30,8 +30,16 @@ void DeviceManager::init_vulkan(SDL_Window *window)
|
||||
features.synchronization2 = true;
|
||||
|
||||
VkPhysicalDeviceVulkan12Features features12{.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES};
|
||||
features12.bufferDeviceAddress = true;
|
||||
features12.descriptorIndexing = true;
|
||||
features12.bufferDeviceAddress = VK_TRUE;
|
||||
features12.descriptorIndexing = VK_TRUE;
|
||||
// Enable update-after-bind related toggles for graphics/compute descriptors
|
||||
features12.descriptorBindingPartiallyBound = VK_TRUE;
|
||||
features12.descriptorBindingUpdateUnusedWhilePending = VK_TRUE;
|
||||
features12.runtimeDescriptorArray = VK_TRUE;
|
||||
features12.descriptorBindingUniformBufferUpdateAfterBind = VK_TRUE;
|
||||
features12.descriptorBindingStorageBufferUpdateAfterBind = VK_TRUE;
|
||||
features12.descriptorBindingSampledImageUpdateAfterBind = VK_TRUE;
|
||||
features12.descriptorBindingStorageImageUpdateAfterBind = VK_TRUE;
|
||||
|
||||
//use vkbootstrap to select a gpu.
|
||||
//We want a gpu that can write to the SDL surface and supports vulkan 1.3
|
||||
@@ -72,14 +80,16 @@ void DeviceManager::init_vulkan(SDL_Window *window)
|
||||
//create the final vulkan device
|
||||
vkb::DeviceBuilder deviceBuilder{physicalDevice};
|
||||
|
||||
// Enable ray query + accel struct features in device create pNext if supported
|
||||
// Ray features are optional and enabled only if supported on the chosen GPU
|
||||
VkPhysicalDeviceAccelerationStructureFeaturesKHR accelReq{ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_FEATURES_KHR };
|
||||
VkPhysicalDeviceRayQueryFeaturesKHR rayqReq{ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_QUERY_FEATURES_KHR };
|
||||
if (_rayQuerySupported && _accelStructSupported)
|
||||
{
|
||||
VkPhysicalDeviceAccelerationStructureFeaturesKHR accelReq{ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_FEATURES_KHR };
|
||||
accelReq.accelerationStructure = VK_TRUE;
|
||||
VkPhysicalDeviceRayQueryFeaturesKHR rayqReq{ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_QUERY_FEATURES_KHR };
|
||||
rayqReq.pNext = &accelReq;
|
||||
rayqReq.rayQuery = VK_TRUE;
|
||||
rayqReq.pNext = &accelReq;
|
||||
}
|
||||
if (_rayQuerySupported && _accelStructSupported) {
|
||||
deviceBuilder.add_pNext(&rayqReq);
|
||||
}
|
||||
|
||||
@@ -111,6 +121,18 @@ void DeviceManager::init_vulkan(SDL_Window *window)
|
||||
|
||||
void DeviceManager::cleanup()
|
||||
{
|
||||
// Optional VMA stats print
|
||||
if (_allocator && vmaDebugEnabled())
|
||||
{
|
||||
VmaTotalStatistics stats{};
|
||||
vmaCalculateStatistics(_allocator, &stats);
|
||||
const VmaStatistics& s = stats.total.statistics;
|
||||
fmt::print("[VMA] Blocks: {} | Allocations: {} | BlockBytes: {} | AllocationBytes: {}\n",
|
||||
(size_t)s.blockCount,
|
||||
(size_t)s.allocationCount,
|
||||
(unsigned long long)s.blockBytes,
|
||||
(unsigned long long)s.allocationBytes);
|
||||
}
|
||||
vkDestroySurfaceKHR(_instance, _surface, nullptr);
|
||||
_deletionQueue.flush();
|
||||
vkDestroyDevice(_device, nullptr);
|
||||
|
||||
@@ -53,6 +53,46 @@
|
||||
|
||||
VulkanEngine *loadedEngine = nullptr;
|
||||
|
||||
static void print_vma_stats(DeviceManager* dev, const char* tag)
|
||||
{
|
||||
if (!vmaDebugEnabled()) return;
|
||||
if (!dev) return;
|
||||
VmaAllocator alloc = dev->allocator();
|
||||
if (!alloc) return;
|
||||
VmaTotalStatistics stats{};
|
||||
vmaCalculateStatistics(alloc, &stats);
|
||||
const VmaStatistics &s = stats.total.statistics;
|
||||
fmt::print("[VMA][{}] Blocks:{} Allocs:{} BlockBytes:{} AllocBytes:{}\n",
|
||||
tag,
|
||||
(size_t)s.blockCount,
|
||||
(size_t)s.allocationCount,
|
||||
(unsigned long long)s.blockBytes,
|
||||
(unsigned long long)s.allocationBytes);
|
||||
}
|
||||
|
||||
static void dump_vma_json(DeviceManager* dev, const char* tag)
|
||||
{
|
||||
if (!vmaDebugEnabled()) return;
|
||||
if (!dev) return;
|
||||
VmaAllocator alloc = dev->allocator();
|
||||
if (!alloc) return;
|
||||
char* json = nullptr;
|
||||
vmaBuildStatsString(alloc, &json, VK_TRUE);
|
||||
if (json)
|
||||
{
|
||||
// Write to a small temp file beside the binary
|
||||
std::string fname = std::string("vma_") + tag + ".json";
|
||||
FILE* f = fopen(fname.c_str(), "wb");
|
||||
if (f)
|
||||
{
|
||||
fwrite(json, 1, strlen(json), f);
|
||||
fclose(f);
|
||||
fmt::print("[VMA] Wrote {}\n", fname);
|
||||
}
|
||||
vmaFreeStatsString(alloc, json);
|
||||
}
|
||||
}
|
||||
|
||||
void VulkanEngine::init()
|
||||
{
|
||||
// We initialize SDL and create a window with it.
|
||||
@@ -150,7 +190,7 @@ void VulkanEngine::init()
|
||||
auto imguiPass = std::make_unique<ImGuiPass>();
|
||||
_renderPassManager->setImGuiPass(std::move(imguiPass));
|
||||
|
||||
const std::string structurePath = _assetManager->modelPath("police_office.glb");
|
||||
const std::string structurePath = _assetManager->modelPath("seoul_high.glb");
|
||||
const auto structureFile = _assetManager->loadGLTF(structurePath);
|
||||
|
||||
assert(structureFile.has_value());
|
||||
@@ -233,7 +273,11 @@ void VulkanEngine::cleanup()
|
||||
{
|
||||
vkDeviceWaitIdle(_deviceManager->device());
|
||||
|
||||
print_vma_stats(_deviceManager.get(), "begin");
|
||||
|
||||
_sceneManager->cleanup();
|
||||
print_vma_stats(_deviceManager.get(), "after SceneManager");
|
||||
dump_vma_json(_deviceManager.get(), "after_SceneManager");
|
||||
|
||||
if (_isInitialized)
|
||||
{
|
||||
@@ -253,24 +297,53 @@ void VulkanEngine::cleanup()
|
||||
metalRoughMaterial.clear_resources(_deviceManager->device());
|
||||
|
||||
_mainDeletionQueue.flush();
|
||||
print_vma_stats(_deviceManager.get(), "after MainDQ flush");
|
||||
dump_vma_json(_deviceManager.get(), "after_MainDQ");
|
||||
|
||||
_renderPassManager->cleanup();
|
||||
print_vma_stats(_deviceManager.get(), "after RenderPassManager");
|
||||
dump_vma_json(_deviceManager.get(), "after_RenderPassManager");
|
||||
|
||||
_pipelineManager->cleanup();
|
||||
print_vma_stats(_deviceManager.get(), "after PipelineManager");
|
||||
dump_vma_json(_deviceManager.get(), "after_PipelineManager");
|
||||
|
||||
compute.cleanup();
|
||||
print_vma_stats(_deviceManager.get(), "after Compute");
|
||||
dump_vma_json(_deviceManager.get(), "after_Compute");
|
||||
|
||||
_swapchainManager->cleanup();
|
||||
print_vma_stats(_deviceManager.get(), "after Swapchain");
|
||||
dump_vma_json(_deviceManager.get(), "after_Swapchain");
|
||||
|
||||
if (_assetManager) _assetManager->cleanup();
|
||||
print_vma_stats(_deviceManager.get(), "after AssetManager");
|
||||
dump_vma_json(_deviceManager.get(), "after_AssetManager");
|
||||
|
||||
// Ensure ray tracing resources (BLAS/TLAS/instance buffers) are freed before VMA is destroyed
|
||||
if (_rayManager) { _rayManager->cleanup(); }
|
||||
print_vma_stats(_deviceManager.get(), "after RTManager");
|
||||
dump_vma_json(_deviceManager.get(), "after_RTManager");
|
||||
|
||||
_resourceManager->cleanup();
|
||||
print_vma_stats(_deviceManager.get(), "after ResourceManager");
|
||||
dump_vma_json(_deviceManager.get(), "after_ResourceManager");
|
||||
|
||||
_samplerManager->cleanup();
|
||||
_descriptorManager->cleanup();
|
||||
print_vma_stats(_deviceManager.get(), "after Samplers+Descriptors");
|
||||
dump_vma_json(_deviceManager.get(), "after_Samplers_Descriptors");
|
||||
|
||||
_context->descriptors->destroy_pools(_deviceManager->device());
|
||||
|
||||
// Extra safety: flush frame deletion queues once more before destroying VMA
|
||||
for (int i = 0; i < FRAME_OVERLAP; i++)
|
||||
{
|
||||
_frames[i]._deletionQueue.flush();
|
||||
}
|
||||
|
||||
print_vma_stats(_deviceManager.get(), "before DeviceManager");
|
||||
dump_vma_json(_deviceManager.get(), "before_DeviceManager");
|
||||
_deviceManager->cleanup();
|
||||
|
||||
SDL_DestroyWindow(_window);
|
||||
@@ -280,11 +353,6 @@ void VulkanEngine::cleanup()
|
||||
void VulkanEngine::draw()
|
||||
{
|
||||
_sceneManager->update_scene();
|
||||
// Build or update TLAS for current frame if RT mode enabled (1 or 2)
|
||||
if (_rayManager && _context->shadowSettings.mode != 0u)
|
||||
{
|
||||
_rayManager->buildTLASFromDrawContext(_context->getMainDrawContext());
|
||||
}
|
||||
//> frame_clear
|
||||
//wait until the gpu has finished rendering the last frame. Timeout of 1 second
|
||||
VK_CHECK(vkWaitForFences(_deviceManager->device(), 1, &get_current_frame()._renderFence, true, 1000000000));
|
||||
@@ -319,6 +387,12 @@ void VulkanEngine::draw()
|
||||
//now that we are sure that the commands finished executing, we can safely reset the command buffer to begin recording again.
|
||||
VK_CHECK(vkResetCommandBuffer(get_current_frame()._mainCommandBuffer, 0));
|
||||
|
||||
// Build or update TLAS for current frame now that the previous frame is idle
|
||||
if (_rayManager && _context->shadowSettings.mode != 0u)
|
||||
{
|
||||
_rayManager->buildTLASFromDrawContext(_context->getMainDrawContext(), get_current_frame()._deletionQueue);
|
||||
}
|
||||
|
||||
//naming it cmd for shorter writing
|
||||
VkCommandBuffer cmd = get_current_frame()._mainCommandBuffer;
|
||||
|
||||
|
||||
@@ -21,6 +21,12 @@ void RayTracingManager::init(DeviceManager *dev, ResourceManager *res)
|
||||
vkGetDeviceProcAddr(_device->device(), "vkCmdBuildAccelerationStructuresKHR"));
|
||||
_vkGetAccelerationStructureDeviceAddressKHR = reinterpret_cast<PFN_vkGetAccelerationStructureDeviceAddressKHR>(
|
||||
vkGetDeviceProcAddr(_device->device(), "vkGetAccelerationStructureDeviceAddressKHR"));
|
||||
|
||||
// Query AS properties for scratch alignment
|
||||
VkPhysicalDeviceAccelerationStructurePropertiesKHR asProps{ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_PROPERTIES_KHR };
|
||||
VkPhysicalDeviceProperties2 props2{ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2, &asProps };
|
||||
vkGetPhysicalDeviceProperties2(_device->physicalDevice(), &props2);
|
||||
_minScratchAlignment = std::max<VkDeviceSize>(asProps.minAccelerationStructureScratchOffsetAlignment, 256);
|
||||
}
|
||||
|
||||
void RayTracingManager::cleanup()
|
||||
@@ -150,11 +156,15 @@ AccelStructureHandle RayTracingManager::getOrBuildBLAS(const std::shared_ptr<Mes
|
||||
asci.size = sizes.accelerationStructureSize;
|
||||
VK_CHECK(_vkCreateAccelerationStructureKHR(_device->device(), &asci, nullptr, &blas.handle));
|
||||
|
||||
AllocatedBuffer scratch = _resources->create_buffer(sizes.buildScratchSize,
|
||||
// Allocate scratch with padding to satisfy alignment requirements
|
||||
const VkDeviceSize align = _minScratchAlignment;
|
||||
const VkDeviceSize padded = sizes.buildScratchSize + (align - 1);
|
||||
AllocatedBuffer scratch = _resources->create_buffer(padded,
|
||||
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
|
||||
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
|
||||
VMA_MEMORY_USAGE_GPU_ONLY);
|
||||
VkDeviceAddress scratchAddr = get_buffer_address(_device->device(), scratch.buffer);
|
||||
VkDeviceAddress scratchBase = get_buffer_address(_device->device(), scratch.buffer);
|
||||
VkDeviceAddress scratchAddr = (scratchBase + (align - 1)) & ~VkDeviceAddress(align - 1);
|
||||
|
||||
buildInfo.dstAccelerationStructure = blas.handle;
|
||||
buildInfo.scratchData.deviceAddress = scratchAddr;
|
||||
@@ -178,18 +188,20 @@ AccelStructureHandle RayTracingManager::getOrBuildBLAS(const std::shared_ptr<Mes
|
||||
return blas;
|
||||
}
|
||||
|
||||
void RayTracingManager::ensure_tlas_storage(VkDeviceSize requiredASSize, VkDeviceSize /*requiredScratch*/)
|
||||
void RayTracingManager::ensure_tlas_storage(VkDeviceSize requiredASSize, VkDeviceSize /*requiredScratch*/, DeletionQueue& dq)
|
||||
{
|
||||
// Simple: recreate TLAS storage if size grows
|
||||
if (_tlas.handle)
|
||||
// Recreate TLAS storage if size grows. Defer destruction to the frame DQ to
|
||||
// avoid freeing while referenced by in-flight frames.
|
||||
if (_tlas.handle || _tlas.storage.buffer)
|
||||
{
|
||||
_vkDestroyAccelerationStructureKHR(_device->device(), _tlas.handle, nullptr);
|
||||
_tlas.handle = VK_NULL_HANDLE;
|
||||
}
|
||||
if (_tlas.storage.buffer)
|
||||
{
|
||||
_resources->destroy_buffer(_tlas.storage);
|
||||
_tlas.storage = {};
|
||||
AccelStructureHandle old = _tlas;
|
||||
dq.push_function([this, old]() {
|
||||
if (old.handle)
|
||||
_vkDestroyAccelerationStructureKHR(_device->device(), old.handle, nullptr);
|
||||
if (old.storage.buffer)
|
||||
_resources->destroy_buffer(old.storage);
|
||||
});
|
||||
_tlas = {};
|
||||
}
|
||||
_tlas.storage = _resources->create_buffer(requiredASSize,
|
||||
VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR |
|
||||
@@ -203,7 +215,7 @@ void RayTracingManager::ensure_tlas_storage(VkDeviceSize requiredASSize, VkDevic
|
||||
VK_CHECK(_vkCreateAccelerationStructureKHR(_device->device(), &asci, nullptr, &_tlas.handle));
|
||||
}
|
||||
|
||||
VkAccelerationStructureKHR RayTracingManager::buildTLASFromDrawContext(const DrawContext &dc)
|
||||
VkAccelerationStructureKHR RayTracingManager::buildTLASFromDrawContext(const DrawContext &dc, DeletionQueue& dq)
|
||||
{
|
||||
// Collect instances; one per render object (opaque only).
|
||||
std::vector<VkAccelerationStructureInstanceKHR> instances;
|
||||
@@ -239,8 +251,19 @@ VkAccelerationStructureKHR RayTracingManager::buildTLASFromDrawContext(const Dra
|
||||
|
||||
if (instances.empty())
|
||||
{
|
||||
// nothing to build
|
||||
return _tlas.handle;
|
||||
// No instances this frame: defer TLAS destruction to avoid racing with previous frames
|
||||
if (_tlas.handle || _tlas.storage.buffer)
|
||||
{
|
||||
AccelStructureHandle old = _tlas;
|
||||
dq.push_function([this, old]() {
|
||||
if (old.handle)
|
||||
_vkDestroyAccelerationStructureKHR(_device->device(), old.handle, nullptr);
|
||||
if (old.storage.buffer)
|
||||
_resources->destroy_buffer(old.storage);
|
||||
});
|
||||
_tlas = {};
|
||||
}
|
||||
return VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
// Ensure instance buffer capacity
|
||||
@@ -293,15 +316,18 @@ VkAccelerationStructureKHR RayTracingManager::buildTLASFromDrawContext(const Dra
|
||||
_vkGetAccelerationStructureBuildSizesKHR(_device->device(), VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR,
|
||||
&buildInfo, &primCount, &sizes);
|
||||
|
||||
ensure_tlas_storage(sizes.accelerationStructureSize, sizes.buildScratchSize);
|
||||
ensure_tlas_storage(sizes.accelerationStructureSize, sizes.buildScratchSize, dq);
|
||||
|
||||
buildInfo.dstAccelerationStructure = _tlas.handle;
|
||||
AllocatedBuffer scratch = _resources->create_buffer(sizes.buildScratchSize,
|
||||
const VkDeviceSize align2 = _minScratchAlignment;
|
||||
const VkDeviceSize padded2 = sizes.buildScratchSize + (align2 - 1);
|
||||
AllocatedBuffer scratch = _resources->create_buffer(padded2,
|
||||
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
|
||||
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
|
||||
VMA_MEMORY_USAGE_GPU_ONLY);
|
||||
VkDeviceAddress scratchAddr = get_buffer_address(_device->device(), scratch.buffer);
|
||||
buildInfo.scratchData.deviceAddress = scratchAddr;
|
||||
VkDeviceAddress scratchBase2 = get_buffer_address(_device->device(), scratch.buffer);
|
||||
VkDeviceAddress scratchAddr2 = (scratchBase2 + (align2 - 1)) & ~VkDeviceAddress(align2 - 1);
|
||||
buildInfo.scratchData.deviceAddress = scratchAddr2;
|
||||
|
||||
VkAccelerationStructureBuildRangeInfoKHR range{};
|
||||
range.primitiveCount = primCount;
|
||||
|
||||
@@ -25,8 +25,9 @@ public:
|
||||
// Build (or get) BLAS for a mesh. Safe to call multiple times.
|
||||
AccelStructureHandle getOrBuildBLAS(const std::shared_ptr<MeshAsset>& mesh);
|
||||
|
||||
// Rebuild TLAS from current draw context; returns TLAS handle (or null if unavailable)
|
||||
VkAccelerationStructureKHR buildTLASFromDrawContext(const DrawContext& dc);
|
||||
// Rebuild TLAS from current draw context; returns TLAS handle (or null if unavailable)
|
||||
// Destruction of previous TLAS resources is deferred via the provided frame deletion queue
|
||||
VkAccelerationStructureKHR buildTLASFromDrawContext(const DrawContext& dc, DeletionQueue& frameDQ);
|
||||
VkAccelerationStructureKHR tlas() const { return _tlas.handle; }
|
||||
VkDeviceAddress tlasAddress() const { return _tlas.deviceAddress; }
|
||||
|
||||
@@ -34,7 +35,7 @@ public:
|
||||
// Safe to call even if no BLAS exists for the buffer.
|
||||
void removeBLASForBuffer(VkBuffer vertexBuffer);
|
||||
|
||||
private:
|
||||
private:
|
||||
// function pointers (resolved on init)
|
||||
PFN_vkCreateAccelerationStructureKHR _vkCreateAccelerationStructureKHR{};
|
||||
PFN_vkDestroyAccelerationStructureKHR _vkDestroyAccelerationStructureKHR{};
|
||||
@@ -42,17 +43,20 @@ public:
|
||||
PFN_vkCmdBuildAccelerationStructuresKHR _vkCmdBuildAccelerationStructuresKHR{};
|
||||
PFN_vkGetAccelerationStructureDeviceAddressKHR _vkGetAccelerationStructureDeviceAddressKHR{};
|
||||
|
||||
DeviceManager* _device{nullptr};
|
||||
ResourceManager* _resources{nullptr};
|
||||
DeviceManager* _device{nullptr};
|
||||
ResourceManager* _resources{nullptr};
|
||||
|
||||
// BLAS cache by vertex buffer handle
|
||||
std::unordered_map<VkBuffer, AccelStructureHandle> _blasByVB;
|
||||
|
||||
// TLAS + scratch / instance buffer (rebuilt per frame)
|
||||
AccelStructureHandle _tlas{};
|
||||
AllocatedBuffer _tlasInstanceBuffer{};
|
||||
size_t _tlasInstanceCapacity{0};
|
||||
// TLAS + scratch / instance buffer (rebuilt per frame)
|
||||
AccelStructureHandle _tlas{};
|
||||
AllocatedBuffer _tlasInstanceBuffer{};
|
||||
size_t _tlasInstanceCapacity{0};
|
||||
|
||||
// Properties
|
||||
VkDeviceSize _minScratchAlignment{256};
|
||||
|
||||
void ensure_tlas_storage(VkDeviceSize requiredASSize, VkDeviceSize requiredScratch);
|
||||
};
|
||||
void ensure_tlas_storage(VkDeviceSize requiredASSize, VkDeviceSize requiredScratch, DeletionQueue& frameDQ);
|
||||
};
|
||||
|
||||
|
||||
@@ -809,13 +809,16 @@ void RenderGraph::add_present_chain(RGImageHandle sourceDraw,
|
||||
|
||||
RGImageHandle RenderGraph::import_draw_image()
|
||||
{
|
||||
RGImportedImageDesc d{};
|
||||
d.name = "drawImage";
|
||||
d.image = _context->getSwapchain()->drawImage().image;
|
||||
d.imageView = _context->getSwapchain()->drawImage().imageView;
|
||||
d.format = _context->getSwapchain()->drawImage().imageFormat;
|
||||
d.extent = _context->getDrawExtent();
|
||||
d.currentLayout = VK_IMAGE_LAYOUT_GENERAL;
|
||||
RGImportedImageDesc d{};
|
||||
d.name = "drawImage";
|
||||
d.image = _context->getSwapchain()->drawImage().image;
|
||||
d.imageView = _context->getSwapchain()->drawImage().imageView;
|
||||
d.format = _context->getSwapchain()->drawImage().imageFormat;
|
||||
d.extent = _context->getDrawExtent();
|
||||
// Treat layout as unknown at frame start to force an explicit barrier
|
||||
// into the first declared usage (compute write / color attach). This
|
||||
// avoids mismatches when the previous frame ended in a different layout.
|
||||
d.currentLayout = VK_IMAGE_LAYOUT_UNDEFINED;
|
||||
return import_image(d);
|
||||
}
|
||||
|
||||
@@ -942,8 +945,10 @@ RGImageHandle RenderGraph::import_swapchain_image(uint32_t index)
|
||||
d.imageView = views[index];
|
||||
d.format = _context->getSwapchain()->swapchainImageFormat();
|
||||
d.extent = _context->getSwapchain()->swapchainExtent();
|
||||
d.currentLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
|
||||
return import_image(d);
|
||||
// On first use after swapchain creation, images are in UNDEFINED layout.
|
||||
// Start from UNDEFINED so the graph inserts the necessary transition.
|
||||
d.currentLayout = VK_IMAGE_LAYOUT_UNDEFINED;
|
||||
return import_image(d);
|
||||
}
|
||||
|
||||
void RenderGraph::resolve_timings()
|
||||
@@ -960,7 +965,7 @@ void RenderGraph::resolve_timings()
|
||||
_context->getDevice()->device(), _timestampPool,
|
||||
0, queryCount,
|
||||
sizeof(uint64_t) * results.size(), results.data(), sizeof(uint64_t),
|
||||
VK_QUERY_RESULT_64_BIT);
|
||||
VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
|
||||
// Convert ticks to ms
|
||||
VkPhysicalDeviceProperties props{};
|
||||
vkGetPhysicalDeviceProperties(_context->getDevice()->physicalDevice(), &props);
|
||||
@@ -983,6 +988,8 @@ void RenderGraph::resolve_timings()
|
||||
}
|
||||
}
|
||||
|
||||
// Ensure any pending work that might still reference the pool is complete
|
||||
vkQueueWaitIdle(_context->getDevice()->graphicsQueue());
|
||||
vkDestroyQueryPool(_context->getDevice()->device(), _timestampPool, nullptr);
|
||||
_timestampPool = VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
@@ -1,8 +1,11 @@
|
||||
#include <render/rg_resources.h>
|
||||
#include <core/engine_context.h>
|
||||
#include <core/vk_resource.h>
|
||||
#include <vk_mem_alloc.h>
|
||||
#include <core/config.h>
|
||||
|
||||
#include "frame_resources.h"
|
||||
#include "vk_device.h"
|
||||
|
||||
void RGResourceRegistry::reset()
|
||||
{
|
||||
@@ -53,7 +56,13 @@ RGImageHandle RGResourceRegistry::add_transient(const RGImageDesc& d)
|
||||
rec.creationUsage = d.usage;
|
||||
|
||||
VkExtent3D size{ d.extent.width, d.extent.height, 1 };
|
||||
rec.allocation = _ctx->getResources()->create_image(size, d.format, d.usage);
|
||||
rec.allocation = _ctx->getResources()->create_image(size, d.format, d.usage);
|
||||
// Name the allocation for diagnostics (optional)
|
||||
if (vmaDebugEnabled() && _ctx && _ctx->getDevice())
|
||||
{
|
||||
std::string nm = std::string("rg.image:") + d.name;
|
||||
vmaSetAllocationName(_ctx->getDevice()->allocator(), rec.allocation.allocation, nm.c_str());
|
||||
}
|
||||
rec.image = rec.allocation.image;
|
||||
rec.imageView = rec.allocation.imageView;
|
||||
|
||||
|
||||
@@ -21,7 +21,8 @@ void GLTFMetallic_Roughness::build_pipelines(VulkanEngine *engine)
|
||||
layoutBuilder.add_binding(2, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER);
|
||||
|
||||
materialLayout = layoutBuilder.build(engine->_deviceManager->device(),
|
||||
VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT);
|
||||
VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
|
||||
nullptr, VK_DESCRIPTOR_SET_LAYOUT_CREATE_UPDATE_AFTER_BIND_POOL_BIT);
|
||||
|
||||
VkDescriptorSetLayout layouts[] = {
|
||||
engine->_descriptorManager->gpuSceneDataLayout(),
|
||||
|
||||
@@ -94,6 +94,6 @@ void BackgroundPass::cleanup()
|
||||
_context->pipelines->destroyComputePipeline("gradient");
|
||||
_context->pipelines->destroyComputePipeline("sky");
|
||||
}
|
||||
fmt::print("RenderPassManager::cleanup()\n");
|
||||
fmt::print("BackgroundPass::cleanup()\n");
|
||||
_backgroundEffects.clear();
|
||||
}
|
||||
|
||||
@@ -30,7 +30,9 @@ void LightingPass::init(EngineContext *context)
|
||||
builder.add_binding(0, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER);
|
||||
builder.add_binding(1, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER);
|
||||
builder.add_binding(2, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER);
|
||||
_gBufferInputDescriptorLayout = builder.build(_context->getDevice()->device(), VK_SHADER_STAGE_FRAGMENT_BIT);
|
||||
_gBufferInputDescriptorLayout = builder.build(
|
||||
_context->getDevice()->device(), VK_SHADER_STAGE_FRAGMENT_BIT,
|
||||
nullptr, VK_DESCRIPTOR_SET_LAYOUT_CREATE_UPDATE_AFTER_BIND_POOL_BIT);
|
||||
}
|
||||
|
||||
// Allocate and write GBuffer descriptor set
|
||||
@@ -51,21 +53,22 @@ void LightingPass::init(EngineContext *context)
|
||||
{
|
||||
DescriptorLayoutBuilder builder;
|
||||
builder.add_binding(0, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, kShadowCascadeCount);
|
||||
_shadowDescriptorLayout = builder.build(_context->getDevice()->device(), VK_SHADER_STAGE_FRAGMENT_BIT);
|
||||
_shadowDescriptorLayout = builder.build(
|
||||
_context->getDevice()->device(), VK_SHADER_STAGE_FRAGMENT_BIT,
|
||||
nullptr, VK_DESCRIPTOR_SET_LAYOUT_CREATE_UPDATE_AFTER_BIND_POOL_BIT);
|
||||
}
|
||||
|
||||
// Build lighting pipeline through PipelineManager
|
||||
// Build lighting pipelines (RT and non-RT) through PipelineManager
|
||||
VkDescriptorSetLayout layouts[] = {
|
||||
_context->getDescriptorLayouts()->gpuSceneDataLayout(),
|
||||
_gBufferInputDescriptorLayout,
|
||||
_shadowDescriptorLayout
|
||||
};
|
||||
|
||||
GraphicsPipelineCreateInfo info{};
|
||||
info.vertexShaderPath = _context->getAssets()->shaderPath("fullscreen.vert.spv");
|
||||
info.fragmentShaderPath = _context->getAssets()->shaderPath("deferred_lighting.frag.spv");
|
||||
info.setLayouts.assign(std::begin(layouts), std::end(layouts));
|
||||
info.configure = [this](PipelineBuilder &b) {
|
||||
GraphicsPipelineCreateInfo baseInfo{};
|
||||
baseInfo.vertexShaderPath = _context->getAssets()->shaderPath("fullscreen.vert.spv");
|
||||
baseInfo.setLayouts.assign(std::begin(layouts), std::end(layouts));
|
||||
baseInfo.configure = [this](PipelineBuilder &b) {
|
||||
b.set_input_topology(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST);
|
||||
b.set_polygon_mode(VK_POLYGON_MODE_FILL);
|
||||
b.set_cull_mode(VK_CULL_MODE_NONE, VK_FRONT_FACE_CLOCKWISE);
|
||||
@@ -74,13 +77,16 @@ void LightingPass::init(EngineContext *context)
|
||||
b.disable_depthtest();
|
||||
b.set_color_attachment_format(_context->getSwapchain()->drawImage().imageFormat);
|
||||
};
|
||||
_context->pipelines->createGraphicsPipeline("deferred_lighting", info);
|
||||
|
||||
// fetch the handles so current frame uses latest versions
|
||||
MaterialPipeline mp{};
|
||||
_context->pipelines->getMaterialPipeline("deferred_lighting", mp);
|
||||
_pipeline = mp.pipeline;
|
||||
_pipelineLayout = mp.layout;
|
||||
// Non-RT variant (no TLAS required)
|
||||
auto infoNoRT = baseInfo;
|
||||
infoNoRT.fragmentShaderPath = _context->getAssets()->shaderPath("deferred_lighting_nort.frag.spv");
|
||||
_context->pipelines->createGraphicsPipeline("deferred_lighting.nort", infoNoRT);
|
||||
|
||||
// RT variant (requires GL_EXT_ray_query and TLAS bound at set=0,binding=1)
|
||||
auto infoRT = baseInfo;
|
||||
infoRT.fragmentShaderPath = _context->getAssets()->shaderPath("deferred_lighting.frag.spv");
|
||||
_context->pipelines->createGraphicsPipeline("deferred_lighting.rt", infoRT);
|
||||
|
||||
_deletionQueue.push_function([&]() {
|
||||
// Pipelines are owned by PipelineManager; only destroy our local descriptor set layout
|
||||
@@ -145,8 +151,20 @@ void LightingPass::draw_lighting(VkCommandBuffer cmd,
|
||||
VkImageView drawView = resources.image_view(drawHandle);
|
||||
if (drawView == VK_NULL_HANDLE) return;
|
||||
|
||||
// Re-fetch pipeline in case it was hot-reloaded
|
||||
pipelineManager->getGraphics("deferred_lighting", _pipeline, _pipelineLayout);
|
||||
// Choose RT only if TLAS is valid; otherwise fall back to non-RT.
|
||||
const bool haveRTFeatures = ctxLocal->getDevice()->supportsAccelerationStructure();
|
||||
const VkAccelerationStructureKHR tlas = (ctxLocal->ray ? ctxLocal->ray->tlas() : VK_NULL_HANDLE);
|
||||
const VkDeviceAddress tlasAddr = (ctxLocal->ray ? ctxLocal->ray->tlasAddress() : 0);
|
||||
const bool useRT = haveRTFeatures && (ctxLocal->shadowSettings.mode != 0u) && (tlas != VK_NULL_HANDLE) && (tlasAddr != 0);
|
||||
|
||||
const char* pipeName = useRT ? "deferred_lighting.rt" : "deferred_lighting.nort";
|
||||
if (!pipelineManager->getGraphics(pipeName, _pipeline, _pipelineLayout))
|
||||
{
|
||||
// Try the other variant as a fallback
|
||||
const char* fallback = useRT ? "deferred_lighting.nort" : "deferred_lighting.rt";
|
||||
if (!pipelineManager->getGraphics(fallback, _pipeline, _pipelineLayout))
|
||||
return; // Neither pipeline is ready
|
||||
}
|
||||
|
||||
// Dynamic rendering is handled by the RenderGraph using the declared draw attachment.
|
||||
|
||||
@@ -168,14 +186,10 @@ void LightingPass::draw_lighting(VkCommandBuffer cmd,
|
||||
deviceManager->device(), descriptorLayouts->gpuSceneDataLayout());
|
||||
DescriptorWriter writer;
|
||||
writer.write_buffer(0, gpuSceneDataBuffer.buffer, sizeof(GPUSceneData), 0, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER);
|
||||
// If TLAS available and feature enabled, bind it at (set=0,binding=1)
|
||||
if (ctxLocal->ray && ctxLocal->getDevice()->supportsAccelerationStructure() && ctxLocal->shadowSettings.mode != 0u)
|
||||
// Only write TLAS when using the RT pipeline and we have a valid TLAS
|
||||
if (useRT)
|
||||
{
|
||||
VkAccelerationStructureKHR tlas = ctxLocal->ray->tlas();
|
||||
if (tlas != VK_NULL_HANDLE)
|
||||
{
|
||||
writer.write_acceleration_structure(1, tlas);
|
||||
}
|
||||
writer.write_acceleration_structure(1, tlas);
|
||||
}
|
||||
writer.update_set(deviceManager->device(), globalDescriptor);
|
||||
|
||||
|
||||
@@ -47,6 +47,7 @@ void ShadowPass::init(EngineContext *context)
|
||||
b.set_multisampling_none();
|
||||
b.disable_blending();
|
||||
|
||||
// Keep reverse-Z convention for shadow maps to match engine depth usage
|
||||
b.enable_depthtest(true, VK_COMPARE_OP_GREATER_OR_EQUAL);
|
||||
b.set_depth_format(VK_FORMAT_D32_SFLOAT);
|
||||
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
#include "render/vk_materials.h"
|
||||
#include "core/vk_initializers.h"
|
||||
#include "core/vk_types.h"
|
||||
#include "core/config.h"
|
||||
#include <glm/gtx/quaternion.hpp>
|
||||
|
||||
#include <fastgltf/glm_element_traits.hpp>
|
||||
@@ -42,6 +43,9 @@ std::optional<AllocatedImage> load_image(VulkanEngine *engine, fastgltf::Asset &
|
||||
VkFormat fmt = srgb ? VK_FORMAT_R8G8B8A8_SRGB : VK_FORMAT_R8G8B8A8_UNORM;
|
||||
newImage = engine->_resourceManager->create_image(
|
||||
data, imagesize, fmt, VK_IMAGE_USAGE_SAMPLED_BIT, false);
|
||||
// Name the allocation for diagnostics
|
||||
if (vmaDebugEnabled())
|
||||
vmaSetAllocationName(engine->_deviceManager->allocator(), newImage.allocation, path.c_str());
|
||||
|
||||
stbi_image_free(data);
|
||||
}
|
||||
@@ -59,6 +63,8 @@ std::optional<AllocatedImage> load_image(VulkanEngine *engine, fastgltf::Asset &
|
||||
VkFormat fmt = srgb ? VK_FORMAT_R8G8B8A8_SRGB : VK_FORMAT_R8G8B8A8_UNORM;
|
||||
newImage = engine->_resourceManager->create_image(
|
||||
data, imagesize, fmt, VK_IMAGE_USAGE_SAMPLED_BIT, false);
|
||||
if (vmaDebugEnabled())
|
||||
vmaSetAllocationName(engine->_deviceManager->allocator(), newImage.allocation, "gltf.vector.image");
|
||||
|
||||
stbi_image_free(data);
|
||||
}
|
||||
@@ -86,8 +92,10 @@ std::optional<AllocatedImage> load_image(VulkanEngine *engine, fastgltf::Asset &
|
||||
imagesize.depth = 1;
|
||||
|
||||
VkFormat fmt = srgb ? VK_FORMAT_R8G8B8A8_SRGB : VK_FORMAT_R8G8B8A8_UNORM;
|
||||
newImage = engine->_resourceManager->create_image(
|
||||
newImage = engine->_resourceManager->create_image(
|
||||
data, imagesize, fmt, VK_IMAGE_USAGE_SAMPLED_BIT, false);
|
||||
if (vmaDebugEnabled())
|
||||
vmaSetAllocationName(engine->_deviceManager->allocator(), newImage.allocation, "gltf.bufferview.image");
|
||||
|
||||
stbi_image_free(data);
|
||||
}
|
||||
@@ -256,22 +264,33 @@ std::optional<std::shared_ptr<LoadedGLTF> > loadGltf(VulkanEngine *engine, std::
|
||||
//< load_arrays
|
||||
|
||||
// load all textures
|
||||
for (fastgltf::Image &image: gltf.images)
|
||||
for (size_t i = 0; i < gltf.images.size(); ++i)
|
||||
{
|
||||
fastgltf::Image &image = gltf.images[i];
|
||||
// Default-load GLTF images as linear; baseColor is reloaded as sRGB when bound
|
||||
std::optional<AllocatedImage> img = load_image(engine, gltf, image, false);
|
||||
|
||||
if (img.has_value())
|
||||
{
|
||||
images.push_back(*img);
|
||||
file.images[image.name.c_str()] = *img;
|
||||
// Use a unique, stable key so every allocation is tracked and later freed.
|
||||
std::string key = image.name.empty() ? (std::string("gltf.image.") + std::to_string(i))
|
||||
: std::string(image.name.c_str());
|
||||
// Avoid accidental collisions from duplicate names
|
||||
int suffix = 1;
|
||||
while (file.images.find(key) != file.images.end())
|
||||
{
|
||||
key = (image.name.empty() ? std::string("gltf.image.") + std::to_string(i)
|
||||
: std::string(image.name.c_str())) + std::string("#") + std::to_string(suffix++);
|
||||
}
|
||||
file.images[key] = *img;
|
||||
}
|
||||
else
|
||||
{
|
||||
// we failed to load, so lets give the slot a default white texture to not
|
||||
// completely break loading
|
||||
images.push_back(engine->_errorCheckerboardImage);
|
||||
std::cout << "gltf failed to load texture " << image.name << std::endl;
|
||||
std::cout << "gltf failed to load texture index " << i << " (name='" << image.name << "')" << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -198,6 +198,13 @@ std::shared_ptr<LoadedGLTF> SceneManager::getScene(const std::string &name)
|
||||
|
||||
void SceneManager::cleanup()
|
||||
{
|
||||
// Explicitly clear dynamic instances first to drop any extra shared_ptrs
|
||||
// that could keep GPU resources alive.
|
||||
clearMeshInstances();
|
||||
clearGLTFInstances();
|
||||
|
||||
// Drop our references to GLTF scenes. Their destructors call clearAll()
|
||||
// exactly once to release GPU resources.
|
||||
loadedScenes.clear();
|
||||
loadedNodes.clear();
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user