object creation&deletion completed(RT error)

This commit is contained in:
2025-11-22 22:33:19 +09:00
parent b023907df8
commit cbcaf23df1
5 changed files with 101 additions and 62 deletions

View File

@@ -15,6 +15,7 @@
#include <fastgltf/parser.hpp> #include <fastgltf/parser.hpp>
#include <fastgltf/util.hpp> #include <fastgltf/util.hpp>
#include <fastgltf/tools.hpp> #include <fastgltf/tools.hpp>
#include <fmt/core.h>
using std::filesystem::path; using std::filesystem::path;
@@ -83,11 +84,28 @@ std::optional<std::shared_ptr<LoadedGLTF> > AssetManager::loadGLTF(std::string_v
if (auto it = _gltfCacheByPath.find(key); it != _gltfCacheByPath.end()) if (auto it = _gltfCacheByPath.find(key); it != _gltfCacheByPath.end())
{ {
if (auto sp = it->second.lock()) return sp; if (auto sp = it->second.lock())
{
fmt::println("[AssetManager] loadGLTF cache hit key='{}' path='{}' ptr={}", key, resolved,
static_cast<const void *>(sp.get()));
return sp;
}
fmt::println("[AssetManager] loadGLTF cache expired key='{}' path='{}' (reloading)", key, resolved);
} }
auto loaded = loadGltf(_engine, resolved); auto loaded = loadGltf(_engine, resolved);
if (!loaded.has_value()) return {}; if (!loaded.has_value()) return {};
if (loaded.value())
{
fmt::println("[AssetManager] loadGLTF loaded new scene key='{}' path='{}' ptr={}", key, resolved,
static_cast<const void *>(loaded.value().get()));
}
else
{
fmt::println("[AssetManager] loadGLTF got empty scene for key='{}' path='{}'", key, resolved);
}
_gltfCacheByPath[key] = loaded.value(); _gltfCacheByPath[key] = loaded.value();
return loaded; return loaded;
} }
@@ -519,11 +537,9 @@ std::shared_ptr<MeshAsset> AssetManager::createMesh(const std::string &name,
auto mesh = std::make_shared<MeshAsset>(); auto mesh = std::make_shared<MeshAsset>();
mesh->name = name; mesh->name = name;
mesh->meshBuffers = _engine->_resourceManager->uploadMesh(indices, vertices); mesh->meshBuffers = _engine->_resourceManager->uploadMesh(indices, vertices);
// Build BLAS for the mesh if ray tracing manager is available // BLAS for this mesh is built lazily when TLAS is constructed from the draw
if (_engine->_rayManager) // context (RayTracingManager::buildTLASFromDrawContext). This keeps RT work
{ // centralized and avoids redundant builds on load.
_engine->_rayManager->getOrBuildBLAS(mesh);
}
GeoSurface surf{}; GeoSurface surf{};
surf.startIndex = 0; surf.startIndex = 0;

View File

@@ -5,6 +5,7 @@
#include "scene/vk_loader.h" #include "scene/vk_loader.h"
#include "scene/vk_scene.h" #include "scene/vk_scene.h"
#include <cstring> #include <cstring>
#include <numeric>
void RayTracingManager::init(DeviceManager *dev, ResourceManager *res) void RayTracingManager::init(DeviceManager *dev, ResourceManager *res)
{ {
@@ -51,24 +52,28 @@ void RayTracingManager::cleanup()
_tlasInstanceBuffer = {}; _tlasInstanceBuffer = {};
_tlasInstanceCapacity = 0; _tlasInstanceCapacity = 0;
} }
for (auto &kv: _blasByVB)
// Destroy any remaining cached BLAS that weren't queued for deferred destroy.
for (auto &kv : _blasByMesh)
{ {
if (kv.second.handle) const AccelStructureHandle &as = kv.second;
if (as.handle)
{ {
_vkDestroyAccelerationStructureKHR(dv, kv.second.handle, nullptr); _vkDestroyAccelerationStructureKHR(dv, as.handle, nullptr);
} }
if (kv.second.storage.buffer) if (as.storage.buffer)
{ {
_resources->destroy_buffer(kv.second.storage); _resources->destroy_buffer(as.storage);
} }
} }
_blasByVB.clear();
_blasByMesh.clear(); _blasByMesh.clear();
} }
void RayTracingManager::flushPendingDeletes() void RayTracingManager::flushPendingDeletes()
{ {
if (_pendingBlasDestroy.empty()) return; if (_pendingBlasDestroy.empty()) return;
fmt::println("[RT] flushPendingDeletes: destroying {} BLAS handles", _pendingBlasDestroy.size());
VkDevice dv = _device->device(); VkDevice dv = _device->device();
for (auto &as : _pendingBlasDestroy) for (auto &as : _pendingBlasDestroy)
{ {
@@ -94,13 +99,10 @@ static VkDeviceAddress get_buffer_address(VkDevice dev, VkBuffer buf)
AccelStructureHandle RayTracingManager::getOrBuildBLAS(const std::shared_ptr<MeshAsset> &mesh) AccelStructureHandle RayTracingManager::getOrBuildBLAS(const std::shared_ptr<MeshAsset> &mesh)
{ {
if (!mesh) return {}; if (!mesh) return {};
VkBuffer vb = mesh->meshBuffers.vertexBuffer.buffer;
if (auto it = _blasByVB.find(vb); it != _blasByVB.end())
{
return it->second;
}
if (auto it = _blasByMesh.find(mesh.get()); it != _blasByMesh.end()) if (auto it = _blasByMesh.find(mesh.get()); it != _blasByMesh.end())
{ {
fmt::println("[RT] getOrBuildBLAS reuse by mesh mesh='{}' handle={}", mesh->name,
static_cast<const void *>(it->second.handle));
return it->second; return it->second;
} }
@@ -113,6 +115,10 @@ AccelStructureHandle RayTracingManager::getOrBuildBLAS(const std::shared_ptr<Mes
VkDeviceAddress vaddr = mesh->meshBuffers.vertexBufferAddress; VkDeviceAddress vaddr = mesh->meshBuffers.vertexBufferAddress;
VkDeviceAddress iaddr = mesh->meshBuffers.indexBufferAddress; VkDeviceAddress iaddr = mesh->meshBuffers.indexBufferAddress;
const uint32_t vcount = mesh->meshBuffers.vertexCount; const uint32_t vcount = mesh->meshBuffers.vertexCount;
VkBuffer vb = mesh->meshBuffers.vertexBuffer.buffer;
fmt::println("[RT] getOrBuildBLAS build mesh='{}' surfaces={} vcount={}", mesh->name,
mesh->surfaces.size(), vcount);
for (const auto &s: mesh->surfaces) for (const auto &s: mesh->surfaces)
{ {
@@ -199,6 +205,12 @@ AccelStructureHandle RayTracingManager::getOrBuildBLAS(const std::shared_ptr<Mes
const VkAccelerationStructureBuildRangeInfoKHR* pRange = ranges.data(); const VkAccelerationStructureBuildRangeInfoKHR* pRange = ranges.data();
_resources->immediate_submit([&](VkCommandBuffer cmd) { _resources->immediate_submit([&](VkCommandBuffer cmd) {
// ppBuildRangeInfos is an array of infoCount pointers; we have 1 build info // ppBuildRangeInfos is an array of infoCount pointers; we have 1 build info
fmt::println("[RT] building BLAS for mesh='{}' geoms={} primsTotal={} storageSize={} scratchSize={}",
mesh->name,
geoms.size(),
maxPrim.empty() ? 0u : std::accumulate(maxPrim.begin(), maxPrim.end(), 0u),
sizes.accelerationStructureSize,
sizes.buildScratchSize);
_vkCmdBuildAccelerationStructuresKHR(cmd, 1, &buildInfo, &pRange); _vkCmdBuildAccelerationStructuresKHR(cmd, 1, &buildInfo, &pRange);
}); });
@@ -210,7 +222,6 @@ AccelStructureHandle RayTracingManager::getOrBuildBLAS(const std::shared_ptr<Mes
dai.accelerationStructure = blas.handle; dai.accelerationStructure = blas.handle;
blas.deviceAddress = _vkGetAccelerationStructureDeviceAddressKHR(_device->device(), &dai); blas.deviceAddress = _vkGetAccelerationStructureDeviceAddressKHR(_device->device(), &dai);
_blasByVB.emplace(vb, blas);
_blasByMesh.emplace(mesh.get(), blas); _blasByMesh.emplace(mesh.get(), blas);
return blas; return blas;
} }
@@ -222,6 +233,10 @@ void RayTracingManager::ensure_tlas_storage(VkDeviceSize requiredASSize, VkDevic
if (_tlas.handle || _tlas.storage.buffer) if (_tlas.handle || _tlas.storage.buffer)
{ {
AccelStructureHandle old = _tlas; AccelStructureHandle old = _tlas;
fmt::println("[RT] ensure_tlas_storage: scheduling old TLAS destroy handle={} buffer={} size={}",
static_cast<const void *>(old.handle),
static_cast<const void *>(old.storage.buffer),
old.storage.info.size);
dq.push_function([this, old]() { dq.push_function([this, old]() {
if (old.handle) if (old.handle)
_vkDestroyAccelerationStructureKHR(_device->device(), old.handle, nullptr); _vkDestroyAccelerationStructureKHR(_device->device(), old.handle, nullptr);
@@ -240,6 +255,11 @@ void RayTracingManager::ensure_tlas_storage(VkDeviceSize requiredASSize, VkDevic
asci.buffer = _tlas.storage.buffer; asci.buffer = _tlas.storage.buffer;
asci.size = requiredASSize; asci.size = requiredASSize;
VK_CHECK(_vkCreateAccelerationStructureKHR(_device->device(), &asci, nullptr, &_tlas.handle)); VK_CHECK(_vkCreateAccelerationStructureKHR(_device->device(), &asci, nullptr, &_tlas.handle));
fmt::println("[RT] ensure_tlas_storage: created TLAS handle={} buffer={} size={}",
static_cast<const void *>(_tlas.handle),
static_cast<const void *>(_tlas.storage.buffer),
requiredASSize);
} }
VkAccelerationStructureKHR RayTracingManager::buildTLASFromDrawContext(const DrawContext &dc, DeletionQueue& dq) VkAccelerationStructureKHR RayTracingManager::buildTLASFromDrawContext(const DrawContext &dc, DeletionQueue& dq)
@@ -248,16 +268,17 @@ VkAccelerationStructureKHR RayTracingManager::buildTLASFromDrawContext(const Dra
std::vector<VkAccelerationStructureInstanceKHR> instances; std::vector<VkAccelerationStructureInstanceKHR> instances;
instances.reserve(dc.OpaqueSurfaces.size()); instances.reserve(dc.OpaqueSurfaces.size());
fmt::println("[RT] buildTLASFromDrawContext: opaqueSurfaces={} current TLAS handle={} buffer={}",
dc.OpaqueSurfaces.size(),
static_cast<const void *>(_tlas.handle),
static_cast<const void *>(_tlas.storage.buffer));
for (const auto &r: dc.OpaqueSurfaces) for (const auto &r: dc.OpaqueSurfaces)
{ {
// Find mesh BLAS by vertex buffer, then by mesh pointer (if available). // Find or lazily build BLAS by mesh pointer. We require sourceMesh
// for ray tracing; objects without it are skipped from TLAS.
AccelStructureHandle blas{}; AccelStructureHandle blas{};
auto it = _blasByVB.find(r.vertexBuffer); if (r.sourceMesh)
if (it != _blasByVB.end())
{
blas = it->second;
}
else if (r.sourceMesh)
{ {
auto itMesh = _blasByMesh.find(r.sourceMesh); auto itMesh = _blasByMesh.find(r.sourceMesh);
if (itMesh != _blasByMesh.end()) if (itMesh != _blasByMesh.end())
@@ -392,50 +413,32 @@ VkAccelerationStructureKHR RayTracingManager::buildTLASFromDrawContext(const Dra
void RayTracingManager::removeBLASForBuffer(VkBuffer vertexBuffer) void RayTracingManager::removeBLASForBuffer(VkBuffer vertexBuffer)
{ {
if (!vertexBuffer) return; if (!vertexBuffer) return;
VkDevice dv = _device->device();
auto it = _blasByVB.find(vertexBuffer);
if (it == _blasByVB.end()) return;
// Find any mesh whose vertex buffer matches and evict its BLAS.
for (auto it = _blasByMesh.begin(); it != _blasByMesh.end(); )
{
const MeshAsset *mesh = it->first;
if (mesh && mesh->meshBuffers.vertexBuffer.buffer == vertexBuffer)
{
// Defer destruction until after the next fence wait to avoid racing in-flight traces. // Defer destruction until after the next fence wait to avoid racing in-flight traces.
_pendingBlasDestroy.push_back(it->second); _pendingBlasDestroy.push_back(it->second);
it = _blasByMesh.erase(it);
// Also erase corresponding mesh-keyed entry if present
for (auto mit = _blasByMesh.begin(); mit != _blasByMesh.end(); )
{
if (mit->second.handle == it->second.handle)
{
mit = _blasByMesh.erase(mit);
} }
else else
{ {
++mit; ++it;
} }
} }
_blasByVB.erase(it);
} }
void RayTracingManager::removeBLASForMesh(const MeshAsset *mesh) void RayTracingManager::removeBLASForMesh(const MeshAsset *mesh)
{ {
if (!mesh) return; if (!mesh) return;
VkDevice dv = _device->device();
auto it = _blasByMesh.find(mesh); auto it = _blasByMesh.find(mesh);
if (it == _blasByMesh.end()) return; if (it == _blasByMesh.end()) return;
// Defer destruction until after the next fence wait to avoid racing in-flight traces. // Defer destruction until after the next fence wait to avoid racing in-flight traces.
_pendingBlasDestroy.push_back(it->second); _pendingBlasDestroy.push_back(it->second);
// Remove any VB-keyed entries that point to the same BLAS
for (auto vbit = _blasByVB.begin(); vbit != _blasByVB.end(); )
{
if (vbit->second.handle == it->second.handle)
{
vbit = _blasByVB.erase(vbit);
}
else
{
++vbit;
}
}
_blasByMesh.erase(it); _blasByMesh.erase(it);
} }

View File

@@ -51,8 +51,9 @@ private:
DeviceManager* _device{nullptr}; DeviceManager* _device{nullptr};
ResourceManager* _resources{nullptr}; ResourceManager* _resources{nullptr};
// BLAS cache by vertex buffer handle (legacy) and by mesh pointer (preferred) // BLAS cache per mesh. BLAS lifetime is tied to MeshAsset lifetime;
std::unordered_map<VkBuffer, AccelStructureHandle> _blasByVB; // when a mesh is destroyed or its GPU buffers are freed, the owning code
// must call removeBLASForMesh/removeBLASForBuffer to drop the cached BLAS.
std::unordered_map<const MeshAsset*, AccelStructureHandle> _blasByMesh; std::unordered_map<const MeshAsset*, AccelStructureHandle> _blasByMesh;
// TLAS + scratch / instance buffer (rebuilt per frame) // TLAS + scratch / instance buffer (rebuilt per frame)

View File

@@ -582,8 +582,17 @@ bool RenderGraph::compile()
const RGBufferRecord *rec = _resources.get_buffer(RGBufferHandle{id}); const RGBufferRecord *rec = _resources.get_buffer(RGBufferHandle{id});
barrier.buffer = rec ? rec->buffer : VK_NULL_HANDLE; barrier.buffer = rec ? rec->buffer : VK_NULL_HANDLE;
barrier.offset = 0; barrier.offset = 0;
// If size is unknown or 0 for imported buffers, use WHOLE_SIZE to satisfy VUID 01188 // For imported buffers we don't always know the exact VkBuffer size, so use WHOLE_SIZE
barrier.size = (rec && rec->size > 0) ? rec->size : VK_WHOLE_SIZE; // to avoid violating VUID-VkBufferMemoryBarrier2-size-01189. For transient buffers
// created by the graph, we track the exact size.
if (rec && !rec->imported && rec->size > 0)
{
barrier.size = rec->size;
}
else
{
barrier.size = VK_WHOLE_SIZE;
}
pass.preBufferBarriers.push_back(barrier); pass.preBufferBarriers.push_back(barrier);
if (rec && !rec->imported) if (rec && !rec->imported)

View File

@@ -218,6 +218,12 @@ std::optional<std::shared_ptr<LoadedGLTF> > loadGltf(VulkanEngine *engine, std::
//< load_1 //< load_1
//> load_2 //> load_2
// we can stimate the descriptors we will need accurately // we can stimate the descriptors we will need accurately
fmt::println("[GLTF] loadGltf: materials={} meshes={} images={} samplers={} (creating descriptor pool)",
gltf.materials.size(),
gltf.meshes.size(),
gltf.images.size(),
gltf.samplers.size());
std::vector<DescriptorAllocatorGrowable::PoolSizeRatio> sizes = { std::vector<DescriptorAllocatorGrowable::PoolSizeRatio> sizes = {
{VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 3}, {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 3},
{VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 3}, {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 3},
@@ -225,6 +231,10 @@ std::optional<std::shared_ptr<LoadedGLTF> > loadGltf(VulkanEngine *engine, std::
}; };
file.descriptorPool.init(engine->_deviceManager->device(), gltf.materials.size(), sizes); file.descriptorPool.init(engine->_deviceManager->device(), gltf.materials.size(), sizes);
fmt::println("[GLTF] loadGltf: descriptor pool initialized for '{}' (materials={})",
filePath,
gltf.materials.size());
//< load_2 //< load_2
//> load_samplers //> load_samplers
@@ -613,6 +623,10 @@ std::optional<std::shared_ptr<LoadedGLTF> > loadGltf(VulkanEngine *engine, std::
} }
newmesh->meshBuffers = engine->_resourceManager->uploadMesh(indices, vertices); newmesh->meshBuffers = engine->_resourceManager->uploadMesh(indices, vertices);
// BLAS for this mesh will be built lazily from RayTracingManager::buildTLASFromDrawContext()
// when ray-traced shadows are enabled. This avoids redundant builds and concentrates
// RT work in one place.
// If CPU vectors ballooned for this mesh, release capacity back to the OS // If CPU vectors ballooned for this mesh, release capacity back to the OS
auto shrink_if_huge = [](auto &vec, size_t elemSizeBytes) { auto shrink_if_huge = [](auto &vec, size_t elemSizeBytes) {
const size_t capBytes = vec.capacity() * elemSizeBytes; const size_t capBytes = vec.capacity() * elemSizeBytes;
@@ -626,10 +640,6 @@ std::optional<std::shared_ptr<LoadedGLTF> > loadGltf(VulkanEngine *engine, std::
}; };
shrink_if_huge(indices, sizeof(uint32_t)); shrink_if_huge(indices, sizeof(uint32_t));
shrink_if_huge(vertices, sizeof(Vertex)); shrink_if_huge(vertices, sizeof(Vertex));
if (engine->_rayManager)
{
engine->_rayManager->getOrBuildBLAS(newmesh);
}
} }
//> load_nodes //> load_nodes
// load all nodes and their meshes // load all nodes and their meshes