FIX: Memory error fix, debug scheme

This commit is contained in:
2025-11-01 01:21:41 +09:00
parent 235d9b2f83
commit d5ff6263ee
18 changed files with 609 additions and 95 deletions

View File

@@ -0,0 +1,273 @@
#version 450
#extension GL_GOOGLE_include_directive : require
#include "input_structures.glsl"
layout(location=0) in vec2 inUV;
layout(location=0) out vec4 outColor;
layout(set=1, binding=0) uniform sampler2D posTex;
layout(set=1, binding=1) uniform sampler2D normalTex;
layout(set=1, binding=2) uniform sampler2D albedoTex;
layout(set=2, binding=0) uniform sampler2D shadowTex[4];
// Tunables for shadow quality and blending
// Border smoothing width in light-space NDC (0..1). Larger = wider cross-fade.
const float SHADOW_BORDER_SMOOTH_NDC = 0.08;
// Base PCF radius in texels for cascade 0; higher cascades scale this up slightly.
const float SHADOW_PCF_BASE_RADIUS = 1.35;
// Additional per-cascade radius scale for coarser cascades (0..1 factor added across levels)
const float SHADOW_PCF_CASCADE_GAIN = 2.0; // extra radius at far end
// Receiver normal-based offset to reduce acne (in world units)
const float SHADOW_NORMAL_OFFSET = 0.0025;
// Scale for receiver-plane depth bias term (tweak if over/under biased)
const float SHADOW_RPDB_SCALE = 1.0;
// Minimum clamp to keep a tiny bias even on perpendicular receivers
const float SHADOW_MIN_BIAS = 1e-5;
const float PI = 3.14159265359;
float hash12(vec2 p)
{
vec3 p3 = fract(vec3(p.xyx) * 0.1031);
p3 += dot(p3, p3.yzx + 33.33); return fract((p3.x + p3.y) * p3.z);
}
const vec2 POISSON_16[16] = vec2[16](
vec2(0.2852, -0.1883), vec2(-0.1464, 0.2591),
vec2(-0.3651, -0.0974), vec2(0.0901, 0.3807),
vec2(0.4740, 0.0679), vec2(-0.0512, -0.4466),
vec2(-0.4497, 0.1673), vec2(0.3347, 0.3211),
vec2(0.1948, -0.4196), vec2(-0.2919, -0.3291),
vec2(-0.0763, 0.4661), vec2(0.4421, -0.2217),
vec2(0.0281, -0.2468), vec2(-0.2104, 0.0573),
vec2(0.1197, 0.0779), vec2(-0.0905, -0.1203)
);
// Compute primary cascade and an optional neighbor for cross-fade near borders
struct CascadeMix { uint i0; uint i1; float w1; };
CascadeMix computeCascadeMix(vec3 worldPos)
{
uint primary = 3u;
vec3 ndcP = vec3(0);
for (uint i = 0u; i < 4u; ++i)
{
vec4 lclip = sceneData.lightViewProjCascades[i] * vec4(worldPos, 1.0);
vec3 ndc = lclip.xyz / max(lclip.w, 1e-6);
if (abs(ndc.x) <= 1.0 && abs(ndc.y) <= 1.0 && ndc.z >= 0.0 && ndc.z <= 1.0)
{
primary = i;
ndcP = ndc;
break;
}
}
CascadeMix cm; cm.i0 = primary; cm.i1 = primary; cm.w1 = 0.0;
if (primary < 3u)
{
float edge = max(abs(ndcP.x), abs(ndcP.y)); // 0..1, 1 at border
// start blending when we are within S of the border
float t = clamp((edge - (1.0 - SHADOW_BORDER_SMOOTH_NDC)) / max(SHADOW_BORDER_SMOOTH_NDC, 1e-4), 0.0, 1.0);
float w = smoothstep(0.0, 1.0, t);
if (w > 0.0)
{
// Only blend if neighbor actually covers the point
uint neighbor = primary + 1u;
vec4 lclipN = sceneData.lightViewProjCascades[neighbor] * vec4(worldPos, 1.0);
vec3 ndcN = lclipN.xyz / max(lclipN.w, 1e-6);
bool insideN = (abs(ndcN.x) <= 1.0 && abs(ndcN.y) <= 1.0 && ndcN.z >= 0.0 && ndcN.z <= 1.0);
if (insideN)
{
cm.i1 = neighbor;
cm.w1 = w;
}
}
}
return cm;
}
// Compute receiver-plane depth gradient dz/duv using derivatives of shadow NDC
// Reference: Akenine-Möller et al., "Receiver Plane Depth Bias" (PCF-friendly)
vec2 receiverPlaneDepthGradient(vec3 ndc, vec3 dndc_dx, vec3 dndc_dy)
{
// Convert XY to shadow map UV derivatives (ndc -> uv: u = 0.5*x + 0.5)
vec2 duv_dx = 0.5 * dndc_dx.xy;
vec2 duv_dy = 0.5 * dndc_dy.xy;
// Build Jacobian J = [du/dx du/dy; dv/dx dv/dy] (column-major)
mat2 J = mat2(duv_dx.x, duv_dy.x,
duv_dx.y, duv_dy.y);
// Depth derivatives w.r.t screen pixels
vec2 dz_dxdy = vec2(dndc_dx.z, dndc_dy.z);
// Invert J to obtain dz/du and dz/dv. Guard against near-singular Jacobian.
float det = J[0][0] * J[1][1] - J[1][0] * J[0][1];
if (abs(det) < 1e-8)
{
// Degenerate mapping; return zero gradient so only slope/const bias applies
return vec2(0.0);
}
// Manual inverse for stability/perf on some drivers
mat2 invJ = (1.0 / det) * mat2( J[1][1], -J[0][1],
-J[1][0], J[0][0]);
return invJ * dz_dxdy; // (dz/du, dz/dv)
}
float sampleCascadeShadow(uint ci, vec3 worldPos, vec3 N, vec3 L)
{
mat4 lightMat = sceneData.lightViewProjCascades[ci];
vec4 lclip = lightMat * vec4(worldPos, 1.0);
vec3 ndc = lclip.xyz / lclip.w;
vec2 suv = ndc.xy * 0.5 + 0.5;
if (any(lessThan(suv, vec2(0.0))) || any(greaterThan(suv, vec2(1.0))))
return 1.0;
float current = clamp(ndc.z, 0.0, 1.0);
// Slope-based tiny baseline bias (cheap safety net)
float NoL = max(dot(N, L), 0.0);
float slopeBias = max(0.0006 * (1.0 - NoL), SHADOW_MIN_BIAS);
// Receiver-plane depth gradient in shadow UV space
vec3 dndc_dx = dFdx(ndc);
vec3 dndc_dy = dFdy(ndc);
vec2 dz_duv = receiverPlaneDepthGradient(ndc, dndc_dx, dndc_dy);
ivec2 dim = textureSize(shadowTex[ci], 0);
vec2 texelSize = 1.0 / vec2(dim);
float baseRadius = SHADOW_PCF_BASE_RADIUS;
float radius = mix(baseRadius, baseRadius + SHADOW_PCF_CASCADE_GAIN, float(ci) / 3.0);
float ang = hash12(suv * 4096.0) * 6.2831853;
vec2 r = vec2(cos(ang), sin(ang));
mat2 rot = mat2(r.x, -r.y, r.y, r.x);
const int TAP_COUNT = 16;
float visible = 0.0;
float wsum = 0.0;
for (int i = 0; i < TAP_COUNT; ++i)
{
vec2 pu = rot * POISSON_16[i];
vec2 off = pu * radius * texelSize; // uv-space offset of this tap
float pr = length(pu);
float w = 1.0 - smoothstep(0.0, 0.65, pr);
float mapD = texture(shadowTex[ci], suv + off).r;
// Receiver-plane depth bias: conservative depth delta over this tap's offset
// Approximate |Δz| ≈ |dz/du|*|Δu| + |dz/dv|*|Δv|
float rpdb = dot(abs(dz_duv), abs(off)) * SHADOW_RPDB_SCALE;
float vis = step(mapD, current + slopeBias + rpdb);
visible += vis * w;
wsum += w;
}
float visibility = (wsum > 0.0) ? (visible / wsum) : 1.0;
return visibility;
}
float calcShadowVisibility(vec3 worldPos, vec3 N, vec3 L)
{
vec3 wp = worldPos + N * SHADOW_NORMAL_OFFSET * (0.5 + 0.5 * (1.0 - max(dot(N, L), 0.0)));
CascadeMix cm = computeCascadeMix(wp);
float v0 = sampleCascadeShadow(cm.i0, wp, N, L);
if (cm.w1 <= 0.0)
return v0;
float v1 = sampleCascadeShadow(cm.i1, wp, N, L);
return mix(v0, v1, clamp(cm.w1, 0.0, 1.0));
}
vec3 fresnelSchlick(float cosTheta, vec3 F0)
{
return F0 + (1.0 - F0) * pow(1.0 - cosTheta, 5.0);
}
float DistributionGGX(vec3 N, vec3 H, float roughness)
{
float a = roughness * roughness;
float a2 = a * a;
float NdotH = max(dot(N, H), 0.0);
float NdotH2 = NdotH * NdotH;
float num = a2;
float denom = (NdotH2 * (a2 - 1.0) + 1.0);
denom = PI * denom * denom;
return num / max(denom, 0.001);
}
float GeometrySchlickGGX(float NdotV, float roughness)
{
float r = (roughness + 1.0);
float k = (r * r) / 8.0;
float denom = NdotV * (1.0 - k) + k;
return NdotV / max(denom, 0.001);
}
float GeometrySmith(vec3 N, vec3 V, vec3 L, float roughness)
{
float ggx2 = GeometrySchlickGGX(max(dot(N, V), 0.0), roughness);
float ggx1 = GeometrySchlickGGX(max(dot(N, L), 0.0), roughness);
return ggx1 * ggx2;
}
void main(){
vec4 posSample = texture(posTex, inUV);
if (posSample.w == 0.0)
{
outColor = vec4(0.0);
return;
}
vec3 pos = posSample.xyz;
vec4 normalSample = texture(normalTex, inUV);
vec3 N = normalize(normalSample.xyz);
float roughness = clamp(normalSample.w, 0.04, 1.0);
vec4 albedoSample = texture(albedoTex, inUV);
vec3 albedo = albedoSample.rgb;
float metallic = clamp(albedoSample.a, 0.0, 1.0);
vec3 camPos = vec3(inverse(sceneData.view)[3]);
vec3 V = normalize(camPos - pos);
vec3 L = normalize(-sceneData.sunlightDirection.xyz);
vec3 H = normalize(V + L);
vec3 F0 = mix(vec3(0.04), albedo, metallic);
vec3 F = fresnelSchlick(max(dot(H, V), 0.0), F0);
float NDF = DistributionGGX(N, H, roughness);
float G = GeometrySmith(N, V, L, roughness);
vec3 numerator = NDF * G * F;
float denom = 4.0 * max(dot(N, V), 0.0) * max(dot(N, L), 0.0);
vec3 specular = numerator / max(denom, 0.001);
vec3 kS = F;
vec3 kD = (1.0 - kS) * (1.0 - metallic);
float NdotL = max(dot(N, L), 0.0);
// Shadowing (directional, forward-Z shadow map)
float visibility = calcShadowVisibility(pos, N, L);
vec3 irradiance = sceneData.sunlightColor.rgb * sceneData.sunlightColor.a * NdotL * visibility;
vec3 color = (kD * albedo / PI + specular) * irradiance;
color += albedo * sceneData.ambientColor.rgb;
outColor = vec4(color, 1.0);
}

View File

@@ -6,6 +6,7 @@
#include "vk_device.h"
#include "core/vk_resource.h"
#include "frame_resources.h"
ComputeBinding ComputeBinding::uniformBuffer(uint32_t binding, VkBuffer buffer, VkDeviceSize size, VkDeviceSize offset)
{
@@ -354,9 +355,22 @@ void ComputeManager::dispatchInstance(VkCommandBuffer cmd, const std::string &in
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline.getPipeline());
updateDescriptorSet(it->second.descriptorSet, it->second.bindings);
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline.getLayout(), 0, 1, &it->second.descriptorSet,
0, nullptr);
// Allocate a transient per-frame descriptor set to avoid updating a set
// that might still be in use by a previous in-flight frame.
VkDescriptorSet transientSet = context->currentFrame
? context->currentFrame->_frameDescriptors.allocate(context->getDevice()->device(), pipeline.descriptorLayout)
: VK_NULL_HANDLE;
if (transientSet == VK_NULL_HANDLE)
{
// Fallback to instance-owned set if per-frame allocator unavailable
updateDescriptorSet(it->second.descriptorSet, it->second.bindings);
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline.getLayout(), 0, 1, &it->second.descriptorSet, 0, nullptr);
}
else
{
updateDescriptorSet(transientSet, it->second.bindings);
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline.getLayout(), 0, 1, &transientSet, 0, nullptr);
}
if (dispatchInfo.pushConstants && dispatchInfo.pushConstantSize > 0)
{
@@ -459,9 +473,22 @@ bool ComputeManager::createPipeline(const std::string &name, const ComputePipeli
DescriptorLayoutBuilder layoutBuilder;
for (size_t i = 0; i < createInfo.descriptorTypes.size(); ++i)
{
layoutBuilder.add_binding(i, createInfo.descriptorTypes[i]);
layoutBuilder.add_binding(static_cast<uint32_t>(i), createInfo.descriptorTypes[i]);
}
computePipeline.descriptorLayout = layoutBuilder.build(context->getDevice()->device(), VK_SHADER_STAGE_COMPUTE_BIT);
// Mark all compute bindings as UPDATE_AFTER_BIND so we can update
// persistent instance descriptor sets while a previous frame is in-flight.
std::vector<VkDescriptorBindingFlags> bindingFlags(createInfo.descriptorTypes.size(),
VK_DESCRIPTOR_BINDING_UPDATE_AFTER_BIND_BIT);
VkDescriptorSetLayoutBindingFlagsCreateInfo flagsCI{ VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO };
flagsCI.bindingCount = static_cast<uint32_t>(bindingFlags.size());
flagsCI.pBindingFlags = bindingFlags.data();
computePipeline.descriptorLayout = layoutBuilder.build(
context->getDevice()->device(),
VK_SHADER_STAGE_COMPUTE_BIT,
&flagsCI,
VK_DESCRIPTOR_SET_LAYOUT_CREATE_UPDATE_AFTER_BIND_POOL_BIT);
}
VkPipelineLayoutCreateInfo layoutInfo = vkinit::pipeline_layout_create_info();

View File

@@ -7,6 +7,22 @@ inline constexpr bool kUseValidationLayers = false;
inline constexpr bool kUseValidationLayers = true;
#endif
// VMA diagnostics (stats prints + JSON dumps + allocation naming)
// - Default: disabled to avoid noise and I/O at shutdown.
// - Enable at runtime by setting environment variable `VE_VMA_DEBUG=1`.
#include <cstdlib>
inline constexpr bool kEnableVmaDebugByDefault = false;
inline bool vmaDebugEnabled()
{
const char *env = std::getenv("VE_VMA_DEBUG");
if (env && *env)
{
// Accept 1/true/yes (case-insensitive)
return (*env == '1') || (*env == 'T') || (*env == 't') || (*env == 'Y') || (*env == 'y');
}
return kEnableVmaDebugByDefault;
}
// Shadow mapping configuration
inline constexpr int kShadowCascadeCount = 4;
// Maximum shadow distance for CSM in view-space units
@@ -22,9 +38,9 @@ inline constexpr float kShadowCascadeRadiusMargin = 10.0f;
inline constexpr float kShadowClipBaseRadius = 20.0f;
// When using dynamic pullback, compute it from the covered XY range of each level.
// pullback = max(kShadowClipPullbackMin, cover * kShadowClipPullbackFactor)
inline constexpr float kShadowClipPullbackFactor = 2.5f; // fraction of XY half-size behind center
inline constexpr float kShadowClipForwardFactor = 2.5f; // fraction of XY half-size in front of center for zFar
inline constexpr float kShadowClipPullbackMin = 160.0f; // lower bound on pullback so near levels dont collapse
inline constexpr float kShadowClipPullbackFactor = 1.5f; // fraction of XY half-size behind center
inline constexpr float kShadowClipForwardFactor = 1.5f; // fraction of XY half-size in front of center for zFar
inline constexpr float kShadowClipPullbackMin = 40.0f; // lower bound on pullback so near levels dont collapse
// Additional Z padding for the orthographic frustum along light direction
inline constexpr float kShadowClipZPadding = 40.0f;

View File

@@ -9,7 +9,9 @@ void DescriptorManager::init(DeviceManager *deviceManager)
{
DescriptorLayoutBuilder builder;
builder.add_binding(0, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER);
_singleImageDescriptorLayout = builder.build(_deviceManager->device(), VK_SHADER_STAGE_FRAGMENT_BIT);
_singleImageDescriptorLayout = builder.build(
_deviceManager->device(), VK_SHADER_STAGE_FRAGMENT_BIT,
nullptr, VK_DESCRIPTOR_SET_LAYOUT_CREATE_UPDATE_AFTER_BIND_POOL_BIT);
} {
DescriptorLayoutBuilder builder;
builder.add_binding(0, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER);
@@ -19,7 +21,8 @@ void DescriptorManager::init(DeviceManager *deviceManager)
builder.add_binding(1, VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR);
}
_gpuSceneDataDescriptorLayout = builder.build(
_deviceManager->device(), VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT);
_deviceManager->device(), VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
nullptr, VK_DESCRIPTOR_SET_LAYOUT_CREATE_UPDATE_AFTER_BIND_POOL_BIT);
}
}

View File

@@ -77,10 +77,13 @@ void DescriptorWriter::write_image(int binding, VkImageView image, VkSampler sam
void DescriptorWriter::write_acceleration_structure(int binding, VkAccelerationStructureKHR as)
{
// Store the handle to ensure the pointer we give to Vulkan stays valid
VkAccelerationStructureKHR &storedAS = accelHandles.emplace_back(as);
VkWriteDescriptorSetAccelerationStructureKHR &acc = accelInfos.emplace_back(
VkWriteDescriptorSetAccelerationStructureKHR{ VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR });
acc.accelerationStructureCount = 1;
acc.pAccelerationStructures = &as;
acc.pAccelerationStructures = &storedAS;
VkWriteDescriptorSet write{ VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET };
write.dstBinding = binding;
@@ -95,6 +98,8 @@ void DescriptorWriter::clear()
imageInfos.clear();
writes.clear();
bufferInfos.clear();
accelInfos.clear();
accelHandles.clear();
}
void DescriptorWriter::update_set(VkDevice device, VkDescriptorSet set)
@@ -118,7 +123,10 @@ void DescriptorAllocator::init_pool(VkDevice device, uint32_t maxSets, std::span
}
VkDescriptorPoolCreateInfo pool_info = {.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO};
pool_info.flags = 0;
// Enable update-after-bind so descriptors used by previous frame can be
// safely rewritten (e.g., compute instances). It is valid to allocate
// non-update-after-bind sets from such a pool.
pool_info.flags = VK_DESCRIPTOR_POOL_CREATE_UPDATE_AFTER_BIND_BIT;
pool_info.maxSets = maxSets;
pool_info.poolSizeCount = (uint32_t) poolSizes.size();
pool_info.pPoolSizes = poolSizes.data();
@@ -187,7 +195,8 @@ VkDescriptorPool DescriptorAllocatorGrowable::create_pool(VkDevice device, uint3
VkDescriptorPoolCreateInfo pool_info = {};
pool_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
pool_info.flags = 0;
// Use update-after-bind pools to support cross-frame rewrites.
pool_info.flags = VK_DESCRIPTOR_POOL_CREATE_UPDATE_AFTER_BIND_BIT;
pool_info.maxSets = setCount;
pool_info.poolSizeCount = (uint32_t) poolSizes.size();
pool_info.pPoolSizes = poolSizes.data();

View File

@@ -20,6 +20,8 @@ struct DescriptorWriter
std::deque<VkDescriptorImageInfo> imageInfos;
std::deque<VkDescriptorBufferInfo> bufferInfos;
std::deque<VkWriteDescriptorSetAccelerationStructureKHR> accelInfos;
// Keep AS handles alive so pAccelerationStructures points to valid memory
std::deque<VkAccelerationStructureKHR> accelHandles;
std::vector<VkWriteDescriptorSet> writes;
void write_image(int binding, VkImageView image, VkSampler sampler, VkImageLayout layout, VkDescriptorType type);

View File

@@ -30,8 +30,16 @@ void DeviceManager::init_vulkan(SDL_Window *window)
features.synchronization2 = true;
VkPhysicalDeviceVulkan12Features features12{.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES};
features12.bufferDeviceAddress = true;
features12.descriptorIndexing = true;
features12.bufferDeviceAddress = VK_TRUE;
features12.descriptorIndexing = VK_TRUE;
// Enable update-after-bind related toggles for graphics/compute descriptors
features12.descriptorBindingPartiallyBound = VK_TRUE;
features12.descriptorBindingUpdateUnusedWhilePending = VK_TRUE;
features12.runtimeDescriptorArray = VK_TRUE;
features12.descriptorBindingUniformBufferUpdateAfterBind = VK_TRUE;
features12.descriptorBindingStorageBufferUpdateAfterBind = VK_TRUE;
features12.descriptorBindingSampledImageUpdateAfterBind = VK_TRUE;
features12.descriptorBindingStorageImageUpdateAfterBind = VK_TRUE;
//use vkbootstrap to select a gpu.
//We want a gpu that can write to the SDL surface and supports vulkan 1.3
@@ -72,14 +80,16 @@ void DeviceManager::init_vulkan(SDL_Window *window)
//create the final vulkan device
vkb::DeviceBuilder deviceBuilder{physicalDevice};
// Enable ray query + accel struct features in device create pNext if supported
// Ray features are optional and enabled only if supported on the chosen GPU
VkPhysicalDeviceAccelerationStructureFeaturesKHR accelReq{ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_FEATURES_KHR };
VkPhysicalDeviceRayQueryFeaturesKHR rayqReq{ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_QUERY_FEATURES_KHR };
if (_rayQuerySupported && _accelStructSupported)
{
VkPhysicalDeviceAccelerationStructureFeaturesKHR accelReq{ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_FEATURES_KHR };
accelReq.accelerationStructure = VK_TRUE;
VkPhysicalDeviceRayQueryFeaturesKHR rayqReq{ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_QUERY_FEATURES_KHR };
rayqReq.pNext = &accelReq;
rayqReq.rayQuery = VK_TRUE;
rayqReq.pNext = &accelReq;
}
if (_rayQuerySupported && _accelStructSupported) {
deviceBuilder.add_pNext(&rayqReq);
}
@@ -111,6 +121,18 @@ void DeviceManager::init_vulkan(SDL_Window *window)
void DeviceManager::cleanup()
{
// Optional VMA stats print
if (_allocator && vmaDebugEnabled())
{
VmaTotalStatistics stats{};
vmaCalculateStatistics(_allocator, &stats);
const VmaStatistics& s = stats.total.statistics;
fmt::print("[VMA] Blocks: {} | Allocations: {} | BlockBytes: {} | AllocationBytes: {}\n",
(size_t)s.blockCount,
(size_t)s.allocationCount,
(unsigned long long)s.blockBytes,
(unsigned long long)s.allocationBytes);
}
vkDestroySurfaceKHR(_instance, _surface, nullptr);
_deletionQueue.flush();
vkDestroyDevice(_device, nullptr);

View File

@@ -53,6 +53,46 @@
VulkanEngine *loadedEngine = nullptr;
static void print_vma_stats(DeviceManager* dev, const char* tag)
{
if (!vmaDebugEnabled()) return;
if (!dev) return;
VmaAllocator alloc = dev->allocator();
if (!alloc) return;
VmaTotalStatistics stats{};
vmaCalculateStatistics(alloc, &stats);
const VmaStatistics &s = stats.total.statistics;
fmt::print("[VMA][{}] Blocks:{} Allocs:{} BlockBytes:{} AllocBytes:{}\n",
tag,
(size_t)s.blockCount,
(size_t)s.allocationCount,
(unsigned long long)s.blockBytes,
(unsigned long long)s.allocationBytes);
}
static void dump_vma_json(DeviceManager* dev, const char* tag)
{
if (!vmaDebugEnabled()) return;
if (!dev) return;
VmaAllocator alloc = dev->allocator();
if (!alloc) return;
char* json = nullptr;
vmaBuildStatsString(alloc, &json, VK_TRUE);
if (json)
{
// Write to a small temp file beside the binary
std::string fname = std::string("vma_") + tag + ".json";
FILE* f = fopen(fname.c_str(), "wb");
if (f)
{
fwrite(json, 1, strlen(json), f);
fclose(f);
fmt::print("[VMA] Wrote {}\n", fname);
}
vmaFreeStatsString(alloc, json);
}
}
void VulkanEngine::init()
{
// We initialize SDL and create a window with it.
@@ -150,7 +190,7 @@ void VulkanEngine::init()
auto imguiPass = std::make_unique<ImGuiPass>();
_renderPassManager->setImGuiPass(std::move(imguiPass));
const std::string structurePath = _assetManager->modelPath("police_office.glb");
const std::string structurePath = _assetManager->modelPath("seoul_high.glb");
const auto structureFile = _assetManager->loadGLTF(structurePath);
assert(structureFile.has_value());
@@ -233,7 +273,11 @@ void VulkanEngine::cleanup()
{
vkDeviceWaitIdle(_deviceManager->device());
print_vma_stats(_deviceManager.get(), "begin");
_sceneManager->cleanup();
print_vma_stats(_deviceManager.get(), "after SceneManager");
dump_vma_json(_deviceManager.get(), "after_SceneManager");
if (_isInitialized)
{
@@ -253,24 +297,53 @@ void VulkanEngine::cleanup()
metalRoughMaterial.clear_resources(_deviceManager->device());
_mainDeletionQueue.flush();
print_vma_stats(_deviceManager.get(), "after MainDQ flush");
dump_vma_json(_deviceManager.get(), "after_MainDQ");
_renderPassManager->cleanup();
print_vma_stats(_deviceManager.get(), "after RenderPassManager");
dump_vma_json(_deviceManager.get(), "after_RenderPassManager");
_pipelineManager->cleanup();
print_vma_stats(_deviceManager.get(), "after PipelineManager");
dump_vma_json(_deviceManager.get(), "after_PipelineManager");
compute.cleanup();
print_vma_stats(_deviceManager.get(), "after Compute");
dump_vma_json(_deviceManager.get(), "after_Compute");
_swapchainManager->cleanup();
print_vma_stats(_deviceManager.get(), "after Swapchain");
dump_vma_json(_deviceManager.get(), "after_Swapchain");
if (_assetManager) _assetManager->cleanup();
print_vma_stats(_deviceManager.get(), "after AssetManager");
dump_vma_json(_deviceManager.get(), "after_AssetManager");
// Ensure ray tracing resources (BLAS/TLAS/instance buffers) are freed before VMA is destroyed
if (_rayManager) { _rayManager->cleanup(); }
print_vma_stats(_deviceManager.get(), "after RTManager");
dump_vma_json(_deviceManager.get(), "after_RTManager");
_resourceManager->cleanup();
print_vma_stats(_deviceManager.get(), "after ResourceManager");
dump_vma_json(_deviceManager.get(), "after_ResourceManager");
_samplerManager->cleanup();
_descriptorManager->cleanup();
print_vma_stats(_deviceManager.get(), "after Samplers+Descriptors");
dump_vma_json(_deviceManager.get(), "after_Samplers_Descriptors");
_context->descriptors->destroy_pools(_deviceManager->device());
// Extra safety: flush frame deletion queues once more before destroying VMA
for (int i = 0; i < FRAME_OVERLAP; i++)
{
_frames[i]._deletionQueue.flush();
}
print_vma_stats(_deviceManager.get(), "before DeviceManager");
dump_vma_json(_deviceManager.get(), "before_DeviceManager");
_deviceManager->cleanup();
SDL_DestroyWindow(_window);
@@ -280,11 +353,6 @@ void VulkanEngine::cleanup()
void VulkanEngine::draw()
{
_sceneManager->update_scene();
// Build or update TLAS for current frame if RT mode enabled (1 or 2)
if (_rayManager && _context->shadowSettings.mode != 0u)
{
_rayManager->buildTLASFromDrawContext(_context->getMainDrawContext());
}
//> frame_clear
//wait until the gpu has finished rendering the last frame. Timeout of 1 second
VK_CHECK(vkWaitForFences(_deviceManager->device(), 1, &get_current_frame()._renderFence, true, 1000000000));
@@ -319,6 +387,12 @@ void VulkanEngine::draw()
//now that we are sure that the commands finished executing, we can safely reset the command buffer to begin recording again.
VK_CHECK(vkResetCommandBuffer(get_current_frame()._mainCommandBuffer, 0));
// Build or update TLAS for current frame now that the previous frame is idle
if (_rayManager && _context->shadowSettings.mode != 0u)
{
_rayManager->buildTLASFromDrawContext(_context->getMainDrawContext(), get_current_frame()._deletionQueue);
}
//naming it cmd for shorter writing
VkCommandBuffer cmd = get_current_frame()._mainCommandBuffer;

View File

@@ -21,6 +21,12 @@ void RayTracingManager::init(DeviceManager *dev, ResourceManager *res)
vkGetDeviceProcAddr(_device->device(), "vkCmdBuildAccelerationStructuresKHR"));
_vkGetAccelerationStructureDeviceAddressKHR = reinterpret_cast<PFN_vkGetAccelerationStructureDeviceAddressKHR>(
vkGetDeviceProcAddr(_device->device(), "vkGetAccelerationStructureDeviceAddressKHR"));
// Query AS properties for scratch alignment
VkPhysicalDeviceAccelerationStructurePropertiesKHR asProps{ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_PROPERTIES_KHR };
VkPhysicalDeviceProperties2 props2{ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2, &asProps };
vkGetPhysicalDeviceProperties2(_device->physicalDevice(), &props2);
_minScratchAlignment = std::max<VkDeviceSize>(asProps.minAccelerationStructureScratchOffsetAlignment, 256);
}
void RayTracingManager::cleanup()
@@ -150,11 +156,15 @@ AccelStructureHandle RayTracingManager::getOrBuildBLAS(const std::shared_ptr<Mes
asci.size = sizes.accelerationStructureSize;
VK_CHECK(_vkCreateAccelerationStructureKHR(_device->device(), &asci, nullptr, &blas.handle));
AllocatedBuffer scratch = _resources->create_buffer(sizes.buildScratchSize,
// Allocate scratch with padding to satisfy alignment requirements
const VkDeviceSize align = _minScratchAlignment;
const VkDeviceSize padded = sizes.buildScratchSize + (align - 1);
AllocatedBuffer scratch = _resources->create_buffer(padded,
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
VMA_MEMORY_USAGE_GPU_ONLY);
VkDeviceAddress scratchAddr = get_buffer_address(_device->device(), scratch.buffer);
VkDeviceAddress scratchBase = get_buffer_address(_device->device(), scratch.buffer);
VkDeviceAddress scratchAddr = (scratchBase + (align - 1)) & ~VkDeviceAddress(align - 1);
buildInfo.dstAccelerationStructure = blas.handle;
buildInfo.scratchData.deviceAddress = scratchAddr;
@@ -178,18 +188,20 @@ AccelStructureHandle RayTracingManager::getOrBuildBLAS(const std::shared_ptr<Mes
return blas;
}
void RayTracingManager::ensure_tlas_storage(VkDeviceSize requiredASSize, VkDeviceSize /*requiredScratch*/)
void RayTracingManager::ensure_tlas_storage(VkDeviceSize requiredASSize, VkDeviceSize /*requiredScratch*/, DeletionQueue& dq)
{
// Simple: recreate TLAS storage if size grows
if (_tlas.handle)
// Recreate TLAS storage if size grows. Defer destruction to the frame DQ to
// avoid freeing while referenced by in-flight frames.
if (_tlas.handle || _tlas.storage.buffer)
{
_vkDestroyAccelerationStructureKHR(_device->device(), _tlas.handle, nullptr);
_tlas.handle = VK_NULL_HANDLE;
}
if (_tlas.storage.buffer)
{
_resources->destroy_buffer(_tlas.storage);
_tlas.storage = {};
AccelStructureHandle old = _tlas;
dq.push_function([this, old]() {
if (old.handle)
_vkDestroyAccelerationStructureKHR(_device->device(), old.handle, nullptr);
if (old.storage.buffer)
_resources->destroy_buffer(old.storage);
});
_tlas = {};
}
_tlas.storage = _resources->create_buffer(requiredASSize,
VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR |
@@ -203,7 +215,7 @@ void RayTracingManager::ensure_tlas_storage(VkDeviceSize requiredASSize, VkDevic
VK_CHECK(_vkCreateAccelerationStructureKHR(_device->device(), &asci, nullptr, &_tlas.handle));
}
VkAccelerationStructureKHR RayTracingManager::buildTLASFromDrawContext(const DrawContext &dc)
VkAccelerationStructureKHR RayTracingManager::buildTLASFromDrawContext(const DrawContext &dc, DeletionQueue& dq)
{
// Collect instances; one per render object (opaque only).
std::vector<VkAccelerationStructureInstanceKHR> instances;
@@ -239,8 +251,19 @@ VkAccelerationStructureKHR RayTracingManager::buildTLASFromDrawContext(const Dra
if (instances.empty())
{
// nothing to build
return _tlas.handle;
// No instances this frame: defer TLAS destruction to avoid racing with previous frames
if (_tlas.handle || _tlas.storage.buffer)
{
AccelStructureHandle old = _tlas;
dq.push_function([this, old]() {
if (old.handle)
_vkDestroyAccelerationStructureKHR(_device->device(), old.handle, nullptr);
if (old.storage.buffer)
_resources->destroy_buffer(old.storage);
});
_tlas = {};
}
return VK_NULL_HANDLE;
}
// Ensure instance buffer capacity
@@ -293,15 +316,18 @@ VkAccelerationStructureKHR RayTracingManager::buildTLASFromDrawContext(const Dra
_vkGetAccelerationStructureBuildSizesKHR(_device->device(), VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR,
&buildInfo, &primCount, &sizes);
ensure_tlas_storage(sizes.accelerationStructureSize, sizes.buildScratchSize);
ensure_tlas_storage(sizes.accelerationStructureSize, sizes.buildScratchSize, dq);
buildInfo.dstAccelerationStructure = _tlas.handle;
AllocatedBuffer scratch = _resources->create_buffer(sizes.buildScratchSize,
const VkDeviceSize align2 = _minScratchAlignment;
const VkDeviceSize padded2 = sizes.buildScratchSize + (align2 - 1);
AllocatedBuffer scratch = _resources->create_buffer(padded2,
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
VMA_MEMORY_USAGE_GPU_ONLY);
VkDeviceAddress scratchAddr = get_buffer_address(_device->device(), scratch.buffer);
buildInfo.scratchData.deviceAddress = scratchAddr;
VkDeviceAddress scratchBase2 = get_buffer_address(_device->device(), scratch.buffer);
VkDeviceAddress scratchAddr2 = (scratchBase2 + (align2 - 1)) & ~VkDeviceAddress(align2 - 1);
buildInfo.scratchData.deviceAddress = scratchAddr2;
VkAccelerationStructureBuildRangeInfoKHR range{};
range.primitiveCount = primCount;

View File

@@ -25,8 +25,9 @@ public:
// Build (or get) BLAS for a mesh. Safe to call multiple times.
AccelStructureHandle getOrBuildBLAS(const std::shared_ptr<MeshAsset>& mesh);
// Rebuild TLAS from current draw context; returns TLAS handle (or null if unavailable)
VkAccelerationStructureKHR buildTLASFromDrawContext(const DrawContext& dc);
// Rebuild TLAS from current draw context; returns TLAS handle (or null if unavailable)
// Destruction of previous TLAS resources is deferred via the provided frame deletion queue
VkAccelerationStructureKHR buildTLASFromDrawContext(const DrawContext& dc, DeletionQueue& frameDQ);
VkAccelerationStructureKHR tlas() const { return _tlas.handle; }
VkDeviceAddress tlasAddress() const { return _tlas.deviceAddress; }
@@ -34,7 +35,7 @@ public:
// Safe to call even if no BLAS exists for the buffer.
void removeBLASForBuffer(VkBuffer vertexBuffer);
private:
private:
// function pointers (resolved on init)
PFN_vkCreateAccelerationStructureKHR _vkCreateAccelerationStructureKHR{};
PFN_vkDestroyAccelerationStructureKHR _vkDestroyAccelerationStructureKHR{};
@@ -42,17 +43,20 @@ public:
PFN_vkCmdBuildAccelerationStructuresKHR _vkCmdBuildAccelerationStructuresKHR{};
PFN_vkGetAccelerationStructureDeviceAddressKHR _vkGetAccelerationStructureDeviceAddressKHR{};
DeviceManager* _device{nullptr};
ResourceManager* _resources{nullptr};
DeviceManager* _device{nullptr};
ResourceManager* _resources{nullptr};
// BLAS cache by vertex buffer handle
std::unordered_map<VkBuffer, AccelStructureHandle> _blasByVB;
// TLAS + scratch / instance buffer (rebuilt per frame)
AccelStructureHandle _tlas{};
AllocatedBuffer _tlasInstanceBuffer{};
size_t _tlasInstanceCapacity{0};
// TLAS + scratch / instance buffer (rebuilt per frame)
AccelStructureHandle _tlas{};
AllocatedBuffer _tlasInstanceBuffer{};
size_t _tlasInstanceCapacity{0};
// Properties
VkDeviceSize _minScratchAlignment{256};
void ensure_tlas_storage(VkDeviceSize requiredASSize, VkDeviceSize requiredScratch);
};
void ensure_tlas_storage(VkDeviceSize requiredASSize, VkDeviceSize requiredScratch, DeletionQueue& frameDQ);
};

View File

@@ -809,13 +809,16 @@ void RenderGraph::add_present_chain(RGImageHandle sourceDraw,
RGImageHandle RenderGraph::import_draw_image()
{
RGImportedImageDesc d{};
d.name = "drawImage";
d.image = _context->getSwapchain()->drawImage().image;
d.imageView = _context->getSwapchain()->drawImage().imageView;
d.format = _context->getSwapchain()->drawImage().imageFormat;
d.extent = _context->getDrawExtent();
d.currentLayout = VK_IMAGE_LAYOUT_GENERAL;
RGImportedImageDesc d{};
d.name = "drawImage";
d.image = _context->getSwapchain()->drawImage().image;
d.imageView = _context->getSwapchain()->drawImage().imageView;
d.format = _context->getSwapchain()->drawImage().imageFormat;
d.extent = _context->getDrawExtent();
// Treat layout as unknown at frame start to force an explicit barrier
// into the first declared usage (compute write / color attach). This
// avoids mismatches when the previous frame ended in a different layout.
d.currentLayout = VK_IMAGE_LAYOUT_UNDEFINED;
return import_image(d);
}
@@ -942,8 +945,10 @@ RGImageHandle RenderGraph::import_swapchain_image(uint32_t index)
d.imageView = views[index];
d.format = _context->getSwapchain()->swapchainImageFormat();
d.extent = _context->getSwapchain()->swapchainExtent();
d.currentLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
return import_image(d);
// On first use after swapchain creation, images are in UNDEFINED layout.
// Start from UNDEFINED so the graph inserts the necessary transition.
d.currentLayout = VK_IMAGE_LAYOUT_UNDEFINED;
return import_image(d);
}
void RenderGraph::resolve_timings()
@@ -960,7 +965,7 @@ void RenderGraph::resolve_timings()
_context->getDevice()->device(), _timestampPool,
0, queryCount,
sizeof(uint64_t) * results.size(), results.data(), sizeof(uint64_t),
VK_QUERY_RESULT_64_BIT);
VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
// Convert ticks to ms
VkPhysicalDeviceProperties props{};
vkGetPhysicalDeviceProperties(_context->getDevice()->physicalDevice(), &props);
@@ -983,6 +988,8 @@ void RenderGraph::resolve_timings()
}
}
// Ensure any pending work that might still reference the pool is complete
vkQueueWaitIdle(_context->getDevice()->graphicsQueue());
vkDestroyQueryPool(_context->getDevice()->device(), _timestampPool, nullptr);
_timestampPool = VK_NULL_HANDLE;
}

View File

@@ -1,8 +1,11 @@
#include <render/rg_resources.h>
#include <core/engine_context.h>
#include <core/vk_resource.h>
#include <vk_mem_alloc.h>
#include <core/config.h>
#include "frame_resources.h"
#include "vk_device.h"
void RGResourceRegistry::reset()
{
@@ -53,7 +56,13 @@ RGImageHandle RGResourceRegistry::add_transient(const RGImageDesc& d)
rec.creationUsage = d.usage;
VkExtent3D size{ d.extent.width, d.extent.height, 1 };
rec.allocation = _ctx->getResources()->create_image(size, d.format, d.usage);
rec.allocation = _ctx->getResources()->create_image(size, d.format, d.usage);
// Name the allocation for diagnostics (optional)
if (vmaDebugEnabled() && _ctx && _ctx->getDevice())
{
std::string nm = std::string("rg.image:") + d.name;
vmaSetAllocationName(_ctx->getDevice()->allocator(), rec.allocation.allocation, nm.c_str());
}
rec.image = rec.allocation.image;
rec.imageView = rec.allocation.imageView;

View File

@@ -21,7 +21,8 @@ void GLTFMetallic_Roughness::build_pipelines(VulkanEngine *engine)
layoutBuilder.add_binding(2, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER);
materialLayout = layoutBuilder.build(engine->_deviceManager->device(),
VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT);
VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
nullptr, VK_DESCRIPTOR_SET_LAYOUT_CREATE_UPDATE_AFTER_BIND_POOL_BIT);
VkDescriptorSetLayout layouts[] = {
engine->_descriptorManager->gpuSceneDataLayout(),

View File

@@ -94,6 +94,6 @@ void BackgroundPass::cleanup()
_context->pipelines->destroyComputePipeline("gradient");
_context->pipelines->destroyComputePipeline("sky");
}
fmt::print("RenderPassManager::cleanup()\n");
fmt::print("BackgroundPass::cleanup()\n");
_backgroundEffects.clear();
}

View File

@@ -30,7 +30,9 @@ void LightingPass::init(EngineContext *context)
builder.add_binding(0, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER);
builder.add_binding(1, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER);
builder.add_binding(2, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER);
_gBufferInputDescriptorLayout = builder.build(_context->getDevice()->device(), VK_SHADER_STAGE_FRAGMENT_BIT);
_gBufferInputDescriptorLayout = builder.build(
_context->getDevice()->device(), VK_SHADER_STAGE_FRAGMENT_BIT,
nullptr, VK_DESCRIPTOR_SET_LAYOUT_CREATE_UPDATE_AFTER_BIND_POOL_BIT);
}
// Allocate and write GBuffer descriptor set
@@ -51,21 +53,22 @@ void LightingPass::init(EngineContext *context)
{
DescriptorLayoutBuilder builder;
builder.add_binding(0, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, kShadowCascadeCount);
_shadowDescriptorLayout = builder.build(_context->getDevice()->device(), VK_SHADER_STAGE_FRAGMENT_BIT);
_shadowDescriptorLayout = builder.build(
_context->getDevice()->device(), VK_SHADER_STAGE_FRAGMENT_BIT,
nullptr, VK_DESCRIPTOR_SET_LAYOUT_CREATE_UPDATE_AFTER_BIND_POOL_BIT);
}
// Build lighting pipeline through PipelineManager
// Build lighting pipelines (RT and non-RT) through PipelineManager
VkDescriptorSetLayout layouts[] = {
_context->getDescriptorLayouts()->gpuSceneDataLayout(),
_gBufferInputDescriptorLayout,
_shadowDescriptorLayout
};
GraphicsPipelineCreateInfo info{};
info.vertexShaderPath = _context->getAssets()->shaderPath("fullscreen.vert.spv");
info.fragmentShaderPath = _context->getAssets()->shaderPath("deferred_lighting.frag.spv");
info.setLayouts.assign(std::begin(layouts), std::end(layouts));
info.configure = [this](PipelineBuilder &b) {
GraphicsPipelineCreateInfo baseInfo{};
baseInfo.vertexShaderPath = _context->getAssets()->shaderPath("fullscreen.vert.spv");
baseInfo.setLayouts.assign(std::begin(layouts), std::end(layouts));
baseInfo.configure = [this](PipelineBuilder &b) {
b.set_input_topology(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST);
b.set_polygon_mode(VK_POLYGON_MODE_FILL);
b.set_cull_mode(VK_CULL_MODE_NONE, VK_FRONT_FACE_CLOCKWISE);
@@ -74,13 +77,16 @@ void LightingPass::init(EngineContext *context)
b.disable_depthtest();
b.set_color_attachment_format(_context->getSwapchain()->drawImage().imageFormat);
};
_context->pipelines->createGraphicsPipeline("deferred_lighting", info);
// fetch the handles so current frame uses latest versions
MaterialPipeline mp{};
_context->pipelines->getMaterialPipeline("deferred_lighting", mp);
_pipeline = mp.pipeline;
_pipelineLayout = mp.layout;
// Non-RT variant (no TLAS required)
auto infoNoRT = baseInfo;
infoNoRT.fragmentShaderPath = _context->getAssets()->shaderPath("deferred_lighting_nort.frag.spv");
_context->pipelines->createGraphicsPipeline("deferred_lighting.nort", infoNoRT);
// RT variant (requires GL_EXT_ray_query and TLAS bound at set=0,binding=1)
auto infoRT = baseInfo;
infoRT.fragmentShaderPath = _context->getAssets()->shaderPath("deferred_lighting.frag.spv");
_context->pipelines->createGraphicsPipeline("deferred_lighting.rt", infoRT);
_deletionQueue.push_function([&]() {
// Pipelines are owned by PipelineManager; only destroy our local descriptor set layout
@@ -145,8 +151,20 @@ void LightingPass::draw_lighting(VkCommandBuffer cmd,
VkImageView drawView = resources.image_view(drawHandle);
if (drawView == VK_NULL_HANDLE) return;
// Re-fetch pipeline in case it was hot-reloaded
pipelineManager->getGraphics("deferred_lighting", _pipeline, _pipelineLayout);
// Choose RT only if TLAS is valid; otherwise fall back to non-RT.
const bool haveRTFeatures = ctxLocal->getDevice()->supportsAccelerationStructure();
const VkAccelerationStructureKHR tlas = (ctxLocal->ray ? ctxLocal->ray->tlas() : VK_NULL_HANDLE);
const VkDeviceAddress tlasAddr = (ctxLocal->ray ? ctxLocal->ray->tlasAddress() : 0);
const bool useRT = haveRTFeatures && (ctxLocal->shadowSettings.mode != 0u) && (tlas != VK_NULL_HANDLE) && (tlasAddr != 0);
const char* pipeName = useRT ? "deferred_lighting.rt" : "deferred_lighting.nort";
if (!pipelineManager->getGraphics(pipeName, _pipeline, _pipelineLayout))
{
// Try the other variant as a fallback
const char* fallback = useRT ? "deferred_lighting.nort" : "deferred_lighting.rt";
if (!pipelineManager->getGraphics(fallback, _pipeline, _pipelineLayout))
return; // Neither pipeline is ready
}
// Dynamic rendering is handled by the RenderGraph using the declared draw attachment.
@@ -168,14 +186,10 @@ void LightingPass::draw_lighting(VkCommandBuffer cmd,
deviceManager->device(), descriptorLayouts->gpuSceneDataLayout());
DescriptorWriter writer;
writer.write_buffer(0, gpuSceneDataBuffer.buffer, sizeof(GPUSceneData), 0, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER);
// If TLAS available and feature enabled, bind it at (set=0,binding=1)
if (ctxLocal->ray && ctxLocal->getDevice()->supportsAccelerationStructure() && ctxLocal->shadowSettings.mode != 0u)
// Only write TLAS when using the RT pipeline and we have a valid TLAS
if (useRT)
{
VkAccelerationStructureKHR tlas = ctxLocal->ray->tlas();
if (tlas != VK_NULL_HANDLE)
{
writer.write_acceleration_structure(1, tlas);
}
writer.write_acceleration_structure(1, tlas);
}
writer.update_set(deviceManager->device(), globalDescriptor);

View File

@@ -47,6 +47,7 @@ void ShadowPass::init(EngineContext *context)
b.set_multisampling_none();
b.disable_blending();
// Keep reverse-Z convention for shadow maps to match engine depth usage
b.enable_depthtest(true, VK_COMPARE_OP_GREATER_OR_EQUAL);
b.set_depth_format(VK_FORMAT_D32_SFLOAT);

View File

@@ -6,6 +6,7 @@
#include "render/vk_materials.h"
#include "core/vk_initializers.h"
#include "core/vk_types.h"
#include "core/config.h"
#include <glm/gtx/quaternion.hpp>
#include <fastgltf/glm_element_traits.hpp>
@@ -42,6 +43,9 @@ std::optional<AllocatedImage> load_image(VulkanEngine *engine, fastgltf::Asset &
VkFormat fmt = srgb ? VK_FORMAT_R8G8B8A8_SRGB : VK_FORMAT_R8G8B8A8_UNORM;
newImage = engine->_resourceManager->create_image(
data, imagesize, fmt, VK_IMAGE_USAGE_SAMPLED_BIT, false);
// Name the allocation for diagnostics
if (vmaDebugEnabled())
vmaSetAllocationName(engine->_deviceManager->allocator(), newImage.allocation, path.c_str());
stbi_image_free(data);
}
@@ -59,6 +63,8 @@ std::optional<AllocatedImage> load_image(VulkanEngine *engine, fastgltf::Asset &
VkFormat fmt = srgb ? VK_FORMAT_R8G8B8A8_SRGB : VK_FORMAT_R8G8B8A8_UNORM;
newImage = engine->_resourceManager->create_image(
data, imagesize, fmt, VK_IMAGE_USAGE_SAMPLED_BIT, false);
if (vmaDebugEnabled())
vmaSetAllocationName(engine->_deviceManager->allocator(), newImage.allocation, "gltf.vector.image");
stbi_image_free(data);
}
@@ -86,8 +92,10 @@ std::optional<AllocatedImage> load_image(VulkanEngine *engine, fastgltf::Asset &
imagesize.depth = 1;
VkFormat fmt = srgb ? VK_FORMAT_R8G8B8A8_SRGB : VK_FORMAT_R8G8B8A8_UNORM;
newImage = engine->_resourceManager->create_image(
newImage = engine->_resourceManager->create_image(
data, imagesize, fmt, VK_IMAGE_USAGE_SAMPLED_BIT, false);
if (vmaDebugEnabled())
vmaSetAllocationName(engine->_deviceManager->allocator(), newImage.allocation, "gltf.bufferview.image");
stbi_image_free(data);
}
@@ -256,22 +264,33 @@ std::optional<std::shared_ptr<LoadedGLTF> > loadGltf(VulkanEngine *engine, std::
//< load_arrays
// load all textures
for (fastgltf::Image &image: gltf.images)
for (size_t i = 0; i < gltf.images.size(); ++i)
{
fastgltf::Image &image = gltf.images[i];
// Default-load GLTF images as linear; baseColor is reloaded as sRGB when bound
std::optional<AllocatedImage> img = load_image(engine, gltf, image, false);
if (img.has_value())
{
images.push_back(*img);
file.images[image.name.c_str()] = *img;
// Use a unique, stable key so every allocation is tracked and later freed.
std::string key = image.name.empty() ? (std::string("gltf.image.") + std::to_string(i))
: std::string(image.name.c_str());
// Avoid accidental collisions from duplicate names
int suffix = 1;
while (file.images.find(key) != file.images.end())
{
key = (image.name.empty() ? std::string("gltf.image.") + std::to_string(i)
: std::string(image.name.c_str())) + std::string("#") + std::to_string(suffix++);
}
file.images[key] = *img;
}
else
{
// we failed to load, so lets give the slot a default white texture to not
// completely break loading
images.push_back(engine->_errorCheckerboardImage);
std::cout << "gltf failed to load texture " << image.name << std::endl;
std::cout << "gltf failed to load texture index " << i << " (name='" << image.name << "')" << std::endl;
}
}

View File

@@ -198,6 +198,13 @@ std::shared_ptr<LoadedGLTF> SceneManager::getScene(const std::string &name)
void SceneManager::cleanup()
{
// Explicitly clear dynamic instances first to drop any extra shared_ptrs
// that could keep GPU resources alive.
clearMeshInstances();
clearGLTFInstances();
// Drop our references to GLTF scenes. Their destructors call clearAll()
// exactly once to release GPU resources.
loadedScenes.clear();
loadedNodes.clear();
}