EDIT: BLAS build is now per-frame async
This commit is contained in:
@@ -1185,6 +1185,10 @@ void VulkanEngine::run()
|
||||
// Safe to destroy any BLAS queued for deletion now that the previous frame is idle.
|
||||
if (_rayManager) { _rayManager->flushPendingDeletes(); }
|
||||
|
||||
// Progress queued BLAS builds over multiple frames to avoid large
|
||||
// stalls when many meshes require ray tracing structures at once.
|
||||
if (_rayManager) { _rayManager->pump_blas_builds(1); }
|
||||
|
||||
// Commit any completed async IBL load now that the GPU is idle.
|
||||
if (_iblManager && _pendingIBLRequest.active)
|
||||
{
|
||||
|
||||
@@ -35,6 +35,8 @@ void RayTracingManager::cleanup()
|
||||
VkDevice dv = _device->device();
|
||||
// Destroy any deferred BLAS first
|
||||
flushPendingDeletes();
|
||||
_blasBuildQueue.clear();
|
||||
_blasPendingMeshes.clear();
|
||||
|
||||
if (_tlas.handle)
|
||||
{
|
||||
@@ -100,21 +102,53 @@ AccelStructureHandle RayTracingManager::getOrBuildBLAS(const std::shared_ptr<Mes
|
||||
{
|
||||
if (!mesh) return {};
|
||||
|
||||
// If uploads are deferred, ensure any pending mesh buffer uploads are flushed
|
||||
// before building a BLAS that reads from those GPU buffers.
|
||||
if (_resources && _resources->deferred_uploads() && _resources->has_pending_uploads())
|
||||
{
|
||||
fmt::println("[RT] getOrBuildBLAS: flushing pending resource uploads before BLAS build");
|
||||
_resources->process_queued_uploads_immediate();
|
||||
}
|
||||
const MeshAsset* key = mesh.get();
|
||||
|
||||
if (auto it = _blasByMesh.find(mesh.get()); it != _blasByMesh.end())
|
||||
// If a BLAS is already cached (even an empty sentinel), return it directly.
|
||||
if (auto it = _blasByMesh.find(key); it != _blasByMesh.end())
|
||||
{
|
||||
fmt::println("[RT] getOrBuildBLAS reuse by mesh mesh='{}' handle={}", mesh->name,
|
||||
static_cast<const void *>(it->second.handle));
|
||||
return it->second;
|
||||
}
|
||||
|
||||
// If a build is already queued or in progress for this mesh, do not enqueue
|
||||
// another job; simply report "not ready yet".
|
||||
if (_blasPendingMeshes.find(key) != _blasPendingMeshes.end())
|
||||
{
|
||||
fmt::println("[RT] getOrBuildBLAS pending build mesh='{}'", mesh->name);
|
||||
return {};
|
||||
}
|
||||
|
||||
// If uploads are deferred, ensure any pending mesh buffer uploads are flushed
|
||||
// before queuing a BLAS that will read from those GPU buffers.
|
||||
if (_resources && _resources->deferred_uploads() && _resources->has_pending_uploads())
|
||||
{
|
||||
fmt::println("[RT] getOrBuildBLAS: flushing pending resource uploads before queuing BLAS build");
|
||||
_resources->process_queued_uploads_immediate();
|
||||
}
|
||||
|
||||
fmt::println("[RT] getOrBuildBLAS queue build mesh='{}'", mesh->name);
|
||||
_blasPendingMeshes.insert(key);
|
||||
_blasBuildQueue.push_back(PendingBlasBuild{key});
|
||||
|
||||
// BLAS will be built asynchronously by pump_blas_builds(); until then,
|
||||
// callers should treat the empty handle as "not ready yet".
|
||||
return {};
|
||||
}
|
||||
|
||||
AccelStructureHandle RayTracingManager::build_blas_for_mesh(const MeshAsset *mesh)
|
||||
{
|
||||
if (!mesh || !_resources || !_device) return {};
|
||||
|
||||
// If uploads are deferred, ensure any pending mesh buffer uploads are flushed
|
||||
// before building a BLAS that reads from those GPU buffers.
|
||||
if (_resources->deferred_uploads() && _resources->has_pending_uploads())
|
||||
{
|
||||
fmt::println("[RT] build_blas_for_mesh: flushing pending resource uploads before BLAS build");
|
||||
_resources->process_queued_uploads_immediate();
|
||||
}
|
||||
|
||||
// Build BLAS with one geometry per surface (skip empty primitives)
|
||||
std::vector<VkAccelerationStructureGeometryKHR> geoms;
|
||||
std::vector<VkAccelerationStructureBuildRangeInfoKHR> ranges;
|
||||
@@ -126,7 +160,7 @@ AccelStructureHandle RayTracingManager::getOrBuildBLAS(const std::shared_ptr<Mes
|
||||
const uint32_t vcount = mesh->meshBuffers.vertexCount;
|
||||
VkBuffer vb = mesh->meshBuffers.vertexBuffer.buffer;
|
||||
|
||||
fmt::println("[RT] getOrBuildBLAS build mesh='{}' surfaces={} vcount={}", mesh->name,
|
||||
fmt::println("[RT] build_blas_for_mesh mesh='{}' surfaces={} vcount={}", mesh->name,
|
||||
mesh->surfaces.size(), vcount);
|
||||
|
||||
for (const auto &s: mesh->surfaces)
|
||||
@@ -162,9 +196,11 @@ AccelStructureHandle RayTracingManager::getOrBuildBLAS(const std::shared_ptr<Mes
|
||||
ranges.push_back(r);
|
||||
}
|
||||
|
||||
// If no valid geometries, skip BLAS build
|
||||
// If no valid geometries, record an empty sentinel to avoid re-queuing.
|
||||
if (geoms.empty())
|
||||
{
|
||||
fmt::println("[RT] build_blas_for_mesh: mesh='{}' has no primitives; skipping BLAS", mesh->name);
|
||||
_blasByMesh.emplace(mesh, AccelStructureHandle{});
|
||||
return {};
|
||||
}
|
||||
|
||||
@@ -231,10 +267,50 @@ AccelStructureHandle RayTracingManager::getOrBuildBLAS(const std::shared_ptr<Mes
|
||||
dai.accelerationStructure = blas.handle;
|
||||
blas.deviceAddress = _vkGetAccelerationStructureDeviceAddressKHR(_device->device(), &dai);
|
||||
|
||||
_blasByMesh.emplace(mesh.get(), blas);
|
||||
_blasByMesh.emplace(mesh, blas);
|
||||
return blas;
|
||||
}
|
||||
|
||||
void RayTracingManager::pump_blas_builds(uint32_t max_builds_per_frame)
|
||||
{
|
||||
if (max_builds_per_frame == 0 || _blasBuildQueue.empty())
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
uint32_t built = 0;
|
||||
|
||||
while (built < max_builds_per_frame && !_blasBuildQueue.empty())
|
||||
{
|
||||
PendingBlasBuild job = _blasBuildQueue.front();
|
||||
_blasBuildQueue.pop_front();
|
||||
|
||||
const MeshAsset* mesh = job.mesh;
|
||||
if (mesh)
|
||||
{
|
||||
// Drop the pending flag for this mesh now; if the build ends up
|
||||
// with an empty handle, getOrBuildBLAS will see the cache entry
|
||||
// (including the empty sentinel) and avoid re-queuing.
|
||||
_blasPendingMeshes.erase(mesh);
|
||||
|
||||
// Skip if a BLAS was already created meanwhile.
|
||||
if (_blasByMesh.find(mesh) == _blasByMesh.end())
|
||||
{
|
||||
AccelStructureHandle blas = build_blas_for_mesh(mesh);
|
||||
if (blas.handle)
|
||||
{
|
||||
++built;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Mesh pointer is null; just drop the pending flag.
|
||||
_blasPendingMeshes.erase(mesh);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void RayTracingManager::ensure_tlas_storage(VkDeviceSize requiredASSize, VkDeviceSize /*requiredScratch*/, DeletionQueue& dq)
|
||||
{
|
||||
// Recreate TLAS storage if size grows. Defer destruction to the frame DQ to
|
||||
@@ -296,7 +372,10 @@ VkAccelerationStructureKHR RayTracingManager::buildTLASFromDrawContext(const Dra
|
||||
}
|
||||
else
|
||||
{
|
||||
// Try to build on the fly if the mesh is still alive (non-owning shared_ptr wrapper).
|
||||
// Queue an async BLAS build if the mesh is still alive
|
||||
// (non-owning shared_ptr wrapper). The BLAS will be built
|
||||
// over subsequent frames by pump_blas_builds(); until then,
|
||||
// this instance will be skipped.
|
||||
std::shared_ptr<MeshAsset> nonOwning(const_cast<MeshAsset *>(r.sourceMesh), [](MeshAsset *) {});
|
||||
blas = getOrBuildBLAS(nonOwning);
|
||||
}
|
||||
@@ -423,6 +502,24 @@ void RayTracingManager::removeBLASForBuffer(VkBuffer vertexBuffer)
|
||||
{
|
||||
if (!vertexBuffer) return;
|
||||
|
||||
// Drop any queued builds referencing this vertex buffer.
|
||||
if (!_blasBuildQueue.empty())
|
||||
{
|
||||
for (auto itQ = _blasBuildQueue.begin(); itQ != _blasBuildQueue.end(); )
|
||||
{
|
||||
const MeshAsset* mesh = itQ->mesh;
|
||||
if (mesh && mesh->meshBuffers.vertexBuffer.buffer == vertexBuffer)
|
||||
{
|
||||
_blasPendingMeshes.erase(mesh);
|
||||
itQ = _blasBuildQueue.erase(itQ);
|
||||
}
|
||||
else
|
||||
{
|
||||
++itQ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Find any mesh whose vertex buffer matches and evict its BLAS.
|
||||
for (auto it = _blasByMesh.begin(); it != _blasByMesh.end(); )
|
||||
{
|
||||
@@ -443,6 +540,24 @@ void RayTracingManager::removeBLASForBuffer(VkBuffer vertexBuffer)
|
||||
void RayTracingManager::removeBLASForMesh(const MeshAsset *mesh)
|
||||
{
|
||||
if (!mesh) return;
|
||||
|
||||
// Drop any queued builds for this mesh.
|
||||
if (!_blasBuildQueue.empty())
|
||||
{
|
||||
for (auto itQ = _blasBuildQueue.begin(); itQ != _blasBuildQueue.end(); )
|
||||
{
|
||||
if (itQ->mesh == mesh)
|
||||
{
|
||||
itQ = _blasBuildQueue.erase(itQ);
|
||||
}
|
||||
else
|
||||
{
|
||||
++itQ;
|
||||
}
|
||||
}
|
||||
}
|
||||
_blasPendingMeshes.erase(mesh);
|
||||
|
||||
auto it = _blasByMesh.find(mesh);
|
||||
if (it == _blasByMesh.end()) return;
|
||||
|
||||
|
||||
@@ -1,20 +1,22 @@
|
||||
#pragma once
|
||||
#include <core/types.h>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
|
||||
class DeviceManager;
|
||||
class ResourceManager;
|
||||
struct DrawContext;
|
||||
struct MeshAsset;
|
||||
|
||||
struct AccelStructureHandle {
|
||||
VkAccelerationStructureKHR handle{VK_NULL_HANDLE};
|
||||
AllocatedBuffer storage{}; // buffer that backs the AS
|
||||
VkDeviceAddress deviceAddress{0};
|
||||
};
|
||||
|
||||
#include <core/types.h>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
#include <deque>
|
||||
#include <memory>
|
||||
|
||||
class DeviceManager;
|
||||
class ResourceManager;
|
||||
struct DrawContext;
|
||||
struct MeshAsset;
|
||||
|
||||
struct AccelStructureHandle {
|
||||
VkAccelerationStructureKHR handle{VK_NULL_HANDLE};
|
||||
AllocatedBuffer storage{}; // buffer that backs the AS
|
||||
VkDeviceAddress deviceAddress{0};
|
||||
};
|
||||
|
||||
// Ray tracing helper that caches BLAS per mesh and rebuilds TLAS per frame
|
||||
// for hybrid/full ray query shadows. See docs/RayTracing.md.
|
||||
class RayTracingManager {
|
||||
@@ -22,8 +24,16 @@ public:
|
||||
void init(DeviceManager* dev, ResourceManager* res);
|
||||
void cleanup();
|
||||
|
||||
// Build (or get) BLAS for a mesh. Safe to call multiple times.
|
||||
AccelStructureHandle getOrBuildBLAS(const std::shared_ptr<MeshAsset>& mesh);
|
||||
// Queue a BLAS build for a mesh (if not already built or queued) and
|
||||
// return the cached handle when available. Safe to call multiple times.
|
||||
// When builds are pending, this may return an empty handle; callers
|
||||
// should treat that as "BLAS not ready yet" and skip ray instances.
|
||||
AccelStructureHandle getOrBuildBLAS(const std::shared_ptr<MeshAsset>& mesh);
|
||||
|
||||
// Progress asynchronous BLAS builds. Call once per frame after waiting
|
||||
// for the previous frame's GPU fence. max_builds_per_frame controls how
|
||||
// many BLAS are built in this pump to spread work over multiple frames.
|
||||
void pump_blas_builds(uint32_t max_builds_per_frame = 1);
|
||||
|
||||
// Rebuild TLAS from current draw context; returns TLAS handle (or null if unavailable)
|
||||
// Destruction of previous TLAS resources is deferred via the provided frame deletion queue
|
||||
@@ -41,12 +51,12 @@ public:
|
||||
void removeBLASForMesh(const MeshAsset *mesh);
|
||||
|
||||
private:
|
||||
// function pointers (resolved on init)
|
||||
PFN_vkCreateAccelerationStructureKHR _vkCreateAccelerationStructureKHR{};
|
||||
PFN_vkDestroyAccelerationStructureKHR _vkDestroyAccelerationStructureKHR{};
|
||||
PFN_vkGetAccelerationStructureBuildSizesKHR _vkGetAccelerationStructureBuildSizesKHR{};
|
||||
PFN_vkCmdBuildAccelerationStructuresKHR _vkCmdBuildAccelerationStructuresKHR{};
|
||||
PFN_vkGetAccelerationStructureDeviceAddressKHR _vkGetAccelerationStructureDeviceAddressKHR{};
|
||||
// function pointers (resolved on init)
|
||||
PFN_vkCreateAccelerationStructureKHR _vkCreateAccelerationStructureKHR{};
|
||||
PFN_vkDestroyAccelerationStructureKHR _vkDestroyAccelerationStructureKHR{};
|
||||
PFN_vkGetAccelerationStructureBuildSizesKHR _vkGetAccelerationStructureBuildSizesKHR{};
|
||||
PFN_vkCmdBuildAccelerationStructuresKHR _vkCmdBuildAccelerationStructuresKHR{};
|
||||
PFN_vkGetAccelerationStructureDeviceAddressKHR _vkGetAccelerationStructureDeviceAddressKHR{};
|
||||
|
||||
DeviceManager* _device{nullptr};
|
||||
ResourceManager* _resources{nullptr};
|
||||
@@ -55,6 +65,16 @@ private:
|
||||
// when a mesh is destroyed or its GPU buffers are freed, the owning code
|
||||
// must call removeBLASForMesh/removeBLASForBuffer to drop the cached BLAS.
|
||||
std::unordered_map<const MeshAsset*, AccelStructureHandle> _blasByMesh;
|
||||
|
||||
struct PendingBlasBuild
|
||||
{
|
||||
const MeshAsset* mesh{nullptr};
|
||||
};
|
||||
|
||||
// Queue of BLAS builds to execute over multiple frames.
|
||||
std::deque<PendingBlasBuild> _blasBuildQueue;
|
||||
// Tracks meshes that have a queued or in-progress BLAS build.
|
||||
std::unordered_set<const MeshAsset*> _blasPendingMeshes;
|
||||
|
||||
// TLAS + scratch / instance buffer (rebuilt per frame)
|
||||
AccelStructureHandle _tlas{};
|
||||
@@ -68,5 +88,6 @@ private:
|
||||
VkDeviceSize _minScratchAlignment{256};
|
||||
|
||||
void ensure_tlas_storage(VkDeviceSize requiredASSize, VkDeviceSize requiredScratch, DeletionQueue& frameDQ);
|
||||
AccelStructureHandle build_blas_for_mesh(const MeshAsset* mesh);
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user