ADD: Ray tracing shadow with Clipmap shadow blending

This commit is contained in:
2025-10-29 00:35:51 +09:00
parent 4a47936414
commit 97177dade3
21 changed files with 655 additions and 19 deletions

View File

@@ -35,7 +35,7 @@ foreach(GLSL ${GLSL_SOURCE_FILES})
message(STATUS ${GLSL})
add_custom_command(
OUTPUT ${SPIRV}
COMMAND ${GLSL_VALIDATOR} -V ${GLSL} -o ${SPIRV}
COMMAND ${GLSL_VALIDATOR} -V --target-env vulkan1.2 ${GLSL} -o ${SPIRV}
DEPENDS ${GLSL})
list(APPEND SPIRV_BINARY_FILES ${SPIRV})
endforeach(GLSL)

View File

@@ -1,3 +1,9 @@
Get-ChildItem -Path "shaders" -Include *.frag,*.vert,*.comp,*.geom,*.tesc,*.tese,*.mesh,*.task,*.rgen,*.rint,*.rahit,*.rchit,*.rmiss,*.rcall -Recurse | ForEach-Object {
glslc $_.FullName -o "$($_.FullName).spv"
$COMMON = @("--target-env=vulkan1.3", "-O", "-g", "-Werror", "-I", "shaders")
Get-ChildItem -Path "shaders" -Include *.frag,*.vert,*.comp,*.geom,*.tesc,*.tese,*.mesh,*.task,*.rgen,*.rint,*.rahit,*.rchit,*.rmiss,*.rcall -Recurse |
ForEach-Object {
$extra = @()
if ($_.Extension -eq ".mesh") { $extra += "-fshader-stage=mesh" }
elseif ($_.Extension -eq ".task") { $extra += "-fshader-stage=task" }
glslc $_.FullName @COMMON @extra -o "$($_.FullName).spv"
}

View File

@@ -1,5 +1,6 @@
#version 450
#version 460
#extension GL_GOOGLE_include_directive : require
#extension GL_EXT_ray_query : require
#include "input_structures.glsl"
layout(location=0) in vec2 inUV;
@@ -9,6 +10,10 @@ layout(set=1, binding=0) uniform sampler2D posTex;
layout(set=1, binding=1) uniform sampler2D normalTex;
layout(set=1, binding=2) uniform sampler2D albedoTex;
layout(set=2, binding=0) uniform sampler2D shadowTex[4];
// TLAS for ray query (optional, guarded by sceneData.rtOptions.x)
#ifdef GL_EXT_ray_query
layout(set=0, binding=1) uniform accelerationStructureEXT topLevelAS;
#endif
// Tunables for shadow quality and blending
// Border smoothing width in light-space NDC (0..1). Larger = wider cross-fade.
@@ -23,6 +28,9 @@ const float SHADOW_NORMAL_OFFSET = 0.0025;
const float SHADOW_RPDB_SCALE = 1.0;
// Minimum clamp to keep a tiny bias even on perpendicular receivers
const float SHADOW_MIN_BIAS = 1e-5;
// Ray query safety params
const float SHADOW_RAY_TMIN = 0.02; // start a bit away from the surface
const float SHADOW_RAY_ORIGIN_BIAS = 0.01; // world units
const float PI = 3.14159265359;
@@ -182,13 +190,76 @@ float calcShadowVisibility(vec3 worldPos, vec3 N, vec3 L)
{
vec3 wp = worldPos + N * SHADOW_NORMAL_OFFSET * (0.5 + 0.5 * (1.0 - max(dot(N, L), 0.0)));
// RT-only mode: cast a ray and skip clipmap sampling entirely
if (sceneData.rtOptions.z == 2u) {
#ifdef GL_EXT_ray_query
float farR = max(max(sceneData.cascadeSplitsView.x, sceneData.cascadeSplitsView.y),
max(sceneData.cascadeSplitsView.z, sceneData.cascadeSplitsView.w));
rayQueryEXT rq;
rayQueryInitializeEXT(rq, topLevelAS, gl_RayFlagsTerminateOnFirstHitEXT | gl_RayFlagsOpaqueEXT,
0xFF, wp + N * SHADOW_RAY_ORIGIN_BIAS, SHADOW_RAY_TMIN, L, farR);
while (rayQueryProceedEXT(rq)) { }
bool hit = (rayQueryGetIntersectionTypeEXT(rq, true) != gl_RayQueryCommittedIntersectionNoneEXT);
return hit ? 0.0 : 1.0;
#else
// Fallback to clipmap PCF if ray query is not available at compile time
;
#endif
}
CascadeMix cm = computeCascadeMix(wp);
float v0 = sampleCascadeShadow(cm.i0, wp, N, L);
if (cm.w1 <= 0.0)
{
// Hybrid ray query assist (terminate-on-first-hit along -L)
#ifdef GL_EXT_ray_query
if (sceneData.rtOptions.x == 1u)
{
float NoL = max(dot(N, L), 0.0);
uint mask = sceneData.rtOptions.y;
bool cascadeEnabled = ((mask >> cm.i0) & 1u) == 1u;
if (cascadeEnabled && NoL < sceneData.rtParams.x)
{
float maxT = sceneData.cascadeSplitsView[cm.i0];
rayQueryEXT rq;
// tmin: small offset to avoid self-hits
rayQueryInitializeEXT(rq, topLevelAS, gl_RayFlagsTerminateOnFirstHitEXT | gl_RayFlagsOpaqueEXT,
0xFF, wp + N * SHADOW_RAY_ORIGIN_BIAS, SHADOW_RAY_TMIN, L, maxT);
bool hit = false;
while (rayQueryProceedEXT(rq)) { }
hit = (rayQueryGetIntersectionTypeEXT(rq, true) != gl_RayQueryCommittedIntersectionNoneEXT);
if (hit) v0 = min(v0, 0.0);
}
}
#endif
return v0;
}
float v1 = sampleCascadeShadow(cm.i1, wp, N, L);
return mix(v0, v1, clamp(cm.w1, 0.0, 1.0));
float vis = mix(v0, v1, clamp(cm.w1, 0.0, 1.0));
// Hybrid assist across blended border: take min if a ray hits in either cascade
#ifdef GL_EXT_ray_query
if (sceneData.rtOptions.x == 1u)
{
float NoL = max(dot(N, L), 0.0);
uint mask = sceneData.rtOptions.y;
bool e0 = ((mask >> cm.i0) & 1u) == 1u;
bool e1 = ((mask >> cm.i1) & 1u) == 1u;
if (NoL < sceneData.rtParams.x && (e0 || e1))
{
float maxT0 = sceneData.cascadeSplitsView[cm.i0];
float maxT1 = sceneData.cascadeSplitsView[cm.i1];
float maxT = max(maxT0, maxT1);
rayQueryEXT rq;
rayQueryInitializeEXT(rq, topLevelAS, gl_RayFlagsTerminateOnFirstHitEXT | gl_RayFlagsOpaqueEXT,
0xFF, wp + N * SHADOW_RAY_ORIGIN_BIAS, SHADOW_RAY_TMIN, L, maxT);
while (rayQueryProceedEXT(rq)) { }
bool hit = (rayQueryGetIntersectionTypeEXT(rq, true) != gl_RayQueryCommittedIntersectionNoneEXT);
if (hit) vis = min(vis, 0.0);
}
}
#endif
return vis;
}
vec3 fresnelSchlick(float cosTheta, vec3 F0)

View File

@@ -16,6 +16,12 @@ layout(set = 0, binding = 0) uniform SceneData{
mat4 lightViewProjCascades[4];
// View-space split distances for selecting cascades (x,y,z,w)
vec4 cascadeSplitsView;
// Ray-query settings (packed)
// rtOptions.x = enabled (1/0)
// rtOptions.y = cascade bitmask (bit i => cascade i assisted)
uvec4 rtOptions;
// rtParams.x = N·L threshold; others reserved
vec4 rtParams;
} sceneData;
layout(set = 1, binding = 0) uniform GLTFMaterialData{

View File

@@ -35,6 +35,8 @@ add_executable (vulkan_engine
core/config.h
core/vk_engine.h
core/vk_engine.cpp
core/vk_raytracing.h
core/vk_raytracing.cpp
# render
render/vk_pipelines.h
render/vk_pipelines.cpp

View File

@@ -282,6 +282,11 @@ std::shared_ptr<MeshAsset> AssetManager::createMesh(const std::string &name,
auto mesh = std::make_shared<MeshAsset>();
mesh->name = name;
mesh->meshBuffers = _engine->_resourceManager->uploadMesh(indices, vertices);
// Build BLAS for the mesh if ray tracing manager is available
if (_engine->_rayManager)
{
_engine->_rayManager->getOrBuildBLAS(mesh);
}
GeoSurface surf{};
surf.startIndex = 0;

View File

@@ -29,6 +29,16 @@ struct FrameResources;
struct SDL_Window;
class AssetManager;
class RenderGraph;
class RayTracingManager;
struct ShadowSettings
{
// 0 = Clipmap only, 1 = Clipmap + RT assist, 2 = RT only
uint32_t mode = 0;
bool hybridRayQueryEnabled = false; // derived convenience: (mode != 0)
uint32_t hybridRayCascadesMask = 0b1110; // bit i => cascade i uses ray query assist (default: 1..3)
float hybridRayNoLThreshold = 0.25f; // trigger when N·L below this (mode==1)
};
class EngineContext
{
@@ -60,6 +70,12 @@ public:
// Assets
AssetManager* assets = nullptr; // non-owning pointer to central AssetManager
// Runtime settings visible to passes/shaders
ShadowSettings shadowSettings{};
// Ray tracing manager (optional, nullptr if unsupported)
RayTracingManager* ray = nullptr;
// Accessors
DeviceManager *getDevice() const { return device.get(); }
ResourceManager *getResources() const { return resources.get(); }

View File

@@ -13,6 +13,11 @@ void DescriptorManager::init(DeviceManager *deviceManager)
} {
DescriptorLayoutBuilder builder;
builder.add_binding(0, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER);
if (_deviceManager->supportsAccelerationStructure())
{
// TLAS for ray query (set=0,binding=1)
builder.add_binding(1, VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR);
}
_gpuSceneDataDescriptorLayout = builder.build(
_deviceManager->device(), VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT);
}

View File

@@ -75,6 +75,21 @@ void DescriptorWriter::write_image(int binding, VkImageView image, VkSampler sam
writes.push_back(write);
}
void DescriptorWriter::write_acceleration_structure(int binding, VkAccelerationStructureKHR as)
{
VkWriteDescriptorSetAccelerationStructureKHR &acc = accelInfos.emplace_back(
VkWriteDescriptorSetAccelerationStructureKHR{ VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR });
acc.accelerationStructureCount = 1;
acc.pAccelerationStructures = &as;
VkWriteDescriptorSet write{ VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET };
write.dstBinding = binding;
write.descriptorType = VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR;
write.descriptorCount = 1;
write.pNext = &acc;
writes.push_back(write);
}
void DescriptorWriter::clear()
{
imageInfos.clear();

View File

@@ -19,12 +19,15 @@ struct DescriptorWriter
{
std::deque<VkDescriptorImageInfo> imageInfos;
std::deque<VkDescriptorBufferInfo> bufferInfos;
std::deque<VkWriteDescriptorSetAccelerationStructureKHR> accelInfos;
std::vector<VkWriteDescriptorSet> writes;
void write_image(int binding, VkImageView image, VkSampler sampler, VkImageLayout layout, VkDescriptorType type);
void write_buffer(int binding, VkBuffer buffer, size_t size, size_t offset, VkDescriptorType type);
void write_acceleration_structure(int binding, VkAccelerationStructureKHR as);
void clear();
void update_set(VkDevice device, VkDescriptorSet set);

View File

@@ -31,7 +31,7 @@ void DeviceManager::init_vulkan(SDL_Window *window)
features12.descriptorIndexing = true;
//use vkbootstrap to select a gpu.
//We want a gpu that can write to the SDL surface and supports vulkan 1.2
//We want a gpu that can write to the SDL surface and supports vulkan 1.3
vkb::PhysicalDeviceSelector selector{vkb_inst};
vkb::PhysicalDevice physicalDevice = selector
.set_minimum_version(1, 3)
@@ -42,10 +42,44 @@ void DeviceManager::init_vulkan(SDL_Window *window)
.value();
//physicalDevice.features.
//create the final vulkan device
// Enable ray tracing extensions on the physical device if supported (before creating the DeviceBuilder)
// Query ray tracing capability on the chosen physical device
{
VkPhysicalDeviceAccelerationStructureFeaturesKHR accelFeat{
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_FEATURES_KHR };
VkPhysicalDeviceRayQueryFeaturesKHR rayqFeat{
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_QUERY_FEATURES_KHR,
.pNext = &accelFeat };
VkPhysicalDeviceFeatures2 feats2{ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2, .pNext = &rayqFeat };
vkGetPhysicalDeviceFeatures2(physicalDevice.physical_device, &feats2);
_rayQuerySupported = (rayqFeat.rayQuery == VK_TRUE);
_accelStructSupported = (accelFeat.accelerationStructure == VK_TRUE);
fmt::print("[Device] RayQuery support: {} | AccelStruct: {}\n",
_rayQuerySupported ? "yes" : "no",
_accelStructSupported ? "yes" : "no");
if (_rayQuerySupported && _accelStructSupported)
{
physicalDevice.enable_extension_if_present(VK_KHR_ACCELERATION_STRUCTURE_EXTENSION_NAME);
physicalDevice.enable_extension_if_present(VK_KHR_RAY_QUERY_EXTENSION_NAME);
physicalDevice.enable_extension_if_present(VK_KHR_DEFERRED_HOST_OPERATIONS_EXTENSION_NAME);
}
}
//create the final vulkan device
vkb::DeviceBuilder deviceBuilder{physicalDevice};
// Enable ray query + accel struct features in device create pNext if supported
if (_rayQuerySupported && _accelStructSupported)
{
VkPhysicalDeviceAccelerationStructureFeaturesKHR accelReq{ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_FEATURES_KHR };
accelReq.accelerationStructure = VK_TRUE;
VkPhysicalDeviceRayQueryFeaturesKHR rayqReq{ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_QUERY_FEATURES_KHR };
rayqReq.pNext = &accelReq;
rayqReq.rayQuery = VK_TRUE;
deviceBuilder.add_pNext(&rayqReq);
}
vkb::Device vkbDevice = deviceBuilder.build().value();
// Get the VkDevice handle used in the rest of a vulkan application

View File

@@ -19,6 +19,10 @@ public:
VmaAllocator allocator() const { return _allocator; }
VkDebugUtilsMessengerEXT debugMessenger() { return _debug_messenger; }
// Ray tracing capabilities (queried at init; not necessarily enabled)
bool supportsRayQuery() const { return _rayQuerySupported; }
bool supportsAccelerationStructure() const { return _accelStructSupported; }
private:
VkInstance _instance = nullptr;
VkDebugUtilsMessengerEXT _debug_messenger = nullptr;
@@ -30,4 +34,8 @@ private:
VmaAllocator _allocator = nullptr;
DeletionQueue _deletionQueue;
// Cached feature support flags
bool _rayQuerySupported{false};
bool _accelStructSupported{false};
};

View File

@@ -100,6 +100,14 @@ void VulkanEngine::init()
_assetManager->init(this);
_context->assets = _assetManager.get();
// Optional ray tracing manager if supported and extensions enabled
if (_deviceManager->supportsRayQuery() && _deviceManager->supportsAccelerationStructure())
{
_rayManager = std::make_unique<RayTracingManager>();
_rayManager->init(_deviceManager.get(), _resourceManager.get());
_context->ray = _rayManager.get();
}
_sceneManager = std::make_unique<SceneManager>();
_sceneManager->init(_context.get());
_context->scene = _sceneManager.get();
@@ -128,7 +136,7 @@ void VulkanEngine::init()
auto imguiPass = std::make_unique<ImGuiPass>();
_renderPassManager->setImGuiPass(std::move(imguiPass));
const std::string structurePath = _assetManager->modelPath("resi.glb");
const std::string structurePath = _assetManager->modelPath("police_office.glb");
const auto structureFile = _assetManager->loadGLTF(structurePath);
assert(structureFile.has_value());
@@ -258,6 +266,11 @@ void VulkanEngine::cleanup()
void VulkanEngine::draw()
{
_sceneManager->update_scene();
// Build or update TLAS for current frame if RT mode enabled (1 or 2)
if (_rayManager && _context->shadowSettings.mode != 0u)
{
_rayManager->buildTLASFromDrawContext(_context->getMainDrawContext());
}
//> frame_clear
//wait until the gpu has finished rendering the last frame. Timeout of 1 second
VK_CHECK(vkWaitForFences(_deviceManager->device(), 1, &get_current_frame()._renderFence, true, 1000000000));
@@ -324,8 +337,7 @@ void VulkanEngine::draw()
RGImageHandle hGBufferAlbedo = _renderGraph->import_gbuffer_albedo();
RGImageHandle hSwapchain = _renderGraph->import_swapchain_image(swapchainImageIndex);
// Create a transient shadow depth target (fixed resolution for now)
// Create transient depth targets for cascaded shadow maps
// Create transient depth targets for cascaded shadow maps (even if RT-only, to keep descriptors stable)
const VkExtent2D shadowExtent{2048, 2048};
std::array<RGImageHandle, kShadowCascadeCount> hShadowCascades{};
for (int i = 0; i < kShadowCascadeCount; ++i)
@@ -345,10 +357,13 @@ void VulkanEngine::draw()
{
background->register_graph(_renderGraph.get(), hDraw, hDepth);
}
if (_context->shadowSettings.mode != 2u)
{
if (auto *shadow = _renderPassManager->getPass<ShadowPass>())
{
shadow->register_graph(_renderGraph.get(), std::span<RGImageHandle>(hShadowCascades.data(), hShadowCascades.size()), shadowExtent);
}
}
if (auto *geometry = _renderPassManager->getPass<GeometryPass>())
{
geometry->register_graph(_renderGraph.get(), hGBufferPosition, hGBufferNormal, hGBufferAlbedo, hDepth);
@@ -508,6 +523,50 @@ void VulkanEngine::run()
ImGui::End();
}
// Shadows / Ray Query settings
if (ImGui::Begin("Shadows"))
{
const bool rq = _deviceManager->supportsRayQuery();
const bool as = _deviceManager->supportsAccelerationStructure();
ImGui::Text("RayQuery: %s", rq ? "supported" : "not available");
ImGui::Text("AccelStruct: %s", as ? "supported" : "not available");
ImGui::Separator();
auto &ss = _context->shadowSettings;
// Mode selection
int mode = static_cast<int>(ss.mode);
ImGui::TextUnformatted("Shadow Mode");
ImGui::RadioButton("Clipmap only", &mode, 0); ImGui::SameLine();
ImGui::RadioButton("Clipmap + RT", &mode, 1); ImGui::SameLine();
ImGui::RadioButton("RT only", &mode, 2);
// If device lacks RT support, force mode 0
if (!(rq && as) && mode != 0) mode = 0;
ss.mode = static_cast<uint32_t>(mode);
ss.hybridRayQueryEnabled = (ss.mode != 0);
// Hybrid controls (mode 1)
ImGui::BeginDisabled(ss.mode != 1u);
ImGui::TextUnformatted("Cascades using ray assist:");
for (int i = 0; i < 4; ++i)
{
bool on = (ss.hybridRayCascadesMask >> i) & 1u;
std::string label = std::string("C") + std::to_string(i);
if (ImGui::Checkbox(label.c_str(), &on))
{
if (on) ss.hybridRayCascadesMask |= (1u << i);
else ss.hybridRayCascadesMask &= ~(1u << i);
}
if (i != 3) ImGui::SameLine();
}
ImGui::SliderFloat("N·L threshold", &ss.hybridRayNoLThreshold, 0.0f, 1.0f, "%.2f");
ImGui::EndDisabled();
ImGui::Separator();
ImGui::TextWrapped("Clipmap only: raster PCF+RPDB. Clipmap+RT: PCF assisted by ray query at low N·L. RT only: skip shadow maps and use ray tests only.");
ImGui::End();
}
// Render Graph debug window
if (ImGui::Begin("Render Graph"))
{
@@ -719,6 +778,7 @@ void VulkanEngine::init_frame_resources()
{VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 3},
{VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 3},
{VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 4},
{VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, 1},
};
for (int i = 0; i < FRAME_OVERLAP; i++)

View File

@@ -30,6 +30,7 @@
#include "core/vk_pipeline_manager.h"
#include "core/asset_manager.h"
#include "render/rg_graph.h"
#include "core/vk_raytracing.h"
constexpr unsigned int FRAME_OVERLAP = 2;
@@ -63,6 +64,7 @@ public:
std::unique_ptr<PipelineManager> _pipelineManager;
std::unique_ptr<AssetManager> _assetManager;
std::unique_ptr<RenderGraph> _renderGraph;
std::unique_ptr<RayTracingManager> _rayManager;
struct SDL_Window *_window{nullptr};
@@ -86,8 +88,6 @@ public:
VkPipelineLayout _meshPipelineLayout;
VkPipeline _meshPipeline;
GPUMeshBuffers rectangle;
std::shared_ptr<MeshAsset> cubeMesh;
std::shared_ptr<MeshAsset> sphereMesh;

309
src/core/vk_raytracing.cpp Normal file
View File

@@ -0,0 +1,309 @@
#include "vk_raytracing.h"
#include "vk_device.h"
#include "vk_resource.h"
#include "vk_initializers.h"
#include "scene/vk_loader.h"
#include "scene/vk_scene.h"
#include <cstring>
void RayTracingManager::init(DeviceManager *dev, ResourceManager *res)
{
_device = dev;
_resources = res;
// resolve function pointers
_vkCreateAccelerationStructureKHR = reinterpret_cast<PFN_vkCreateAccelerationStructureKHR>(
vkGetDeviceProcAddr(_device->device(), "vkCreateAccelerationStructureKHR"));
_vkDestroyAccelerationStructureKHR = reinterpret_cast<PFN_vkDestroyAccelerationStructureKHR>(
vkGetDeviceProcAddr(_device->device(), "vkDestroyAccelerationStructureKHR"));
_vkGetAccelerationStructureBuildSizesKHR = reinterpret_cast<PFN_vkGetAccelerationStructureBuildSizesKHR>(
vkGetDeviceProcAddr(_device->device(), "vkGetAccelerationStructureBuildSizesKHR"));
_vkCmdBuildAccelerationStructuresKHR = reinterpret_cast<PFN_vkCmdBuildAccelerationStructuresKHR>(
vkGetDeviceProcAddr(_device->device(), "vkCmdBuildAccelerationStructuresKHR"));
_vkGetAccelerationStructureDeviceAddressKHR = reinterpret_cast<PFN_vkGetAccelerationStructureDeviceAddressKHR>(
vkGetDeviceProcAddr(_device->device(), "vkGetAccelerationStructureDeviceAddressKHR"));
}
void RayTracingManager::cleanup()
{
VkDevice dv = _device->device();
if (_tlas.handle)
{
_vkDestroyAccelerationStructureKHR(dv, _tlas.handle, nullptr);
_tlas.handle = VK_NULL_HANDLE;
}
if (_tlas.storage.buffer)
{
_resources->destroy_buffer(_tlas.storage);
_tlas.storage = {};
}
if (_tlasInstanceBuffer.buffer)
{
_resources->destroy_buffer(_tlasInstanceBuffer);
_tlasInstanceBuffer = {};
_tlasInstanceCapacity = 0;
}
for (auto &kv: _blasByVB)
{
if (kv.second.handle)
{
_vkDestroyAccelerationStructureKHR(dv, kv.second.handle, nullptr);
}
if (kv.second.storage.buffer)
{
_resources->destroy_buffer(kv.second.storage);
}
}
_blasByVB.clear();
}
static VkDeviceAddress get_buffer_address(VkDevice dev, VkBuffer buf)
{
VkBufferDeviceAddressInfo info{VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO};
info.buffer = buf;
return vkGetBufferDeviceAddress(dev, &info);
}
AccelStructureHandle RayTracingManager::getOrBuildBLAS(const std::shared_ptr<MeshAsset> &mesh)
{
if (!mesh) return {};
VkBuffer vb = mesh->meshBuffers.vertexBuffer.buffer;
if (auto it = _blasByVB.find(vb); it != _blasByVB.end())
{
return it->second;
}
// Build BLAS with one geometry per surface
std::vector<VkAccelerationStructureGeometryKHR> geoms;
std::vector<VkAccelerationStructureBuildRangeInfoKHR> ranges;
geoms.reserve(mesh->surfaces.size());
ranges.reserve(mesh->surfaces.size());
VkDeviceAddress vaddr = mesh->meshBuffers.vertexBufferAddress;
VkDeviceAddress iaddr = mesh->meshBuffers.indexBufferAddress;
const uint32_t vcount = mesh->meshBuffers.vertexCount;
for (const auto &s: mesh->surfaces)
{
VkAccelerationStructureGeometryTrianglesDataKHR tri{
VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR
};
tri.vertexFormat = VK_FORMAT_R32G32B32_SFLOAT;
tri.vertexData.deviceAddress = vaddr;
tri.vertexStride = sizeof(Vertex);
tri.maxVertex = vcount ? (vcount - 1) : 0; // conservative
tri.indexType = VK_INDEX_TYPE_UINT32;
tri.indexData.deviceAddress = iaddr + static_cast<VkDeviceAddress>(s.startIndex) * sizeof(uint32_t);
tri.transformData.deviceAddress = 0; // identity
VkAccelerationStructureGeometryKHR g{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR};
g.geometryType = VK_GEOMETRY_TYPE_TRIANGLES_KHR;
g.flags = VK_GEOMETRY_OPAQUE_BIT_KHR;
g.geometry.triangles = tri;
geoms.push_back(g);
VkAccelerationStructureBuildRangeInfoKHR r{};
r.primitiveCount = s.count / 3;
r.primitiveOffset = 0; // encoded through indexData deviceAddress
r.firstVertex = 0;
r.transformOffset = 0;
ranges.push_back(r);
}
VkAccelerationStructureBuildGeometryInfoKHR buildInfo{
VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR
};
buildInfo.type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR;
buildInfo.flags = VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_KHR;
buildInfo.mode = VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR;
buildInfo.geometryCount = static_cast<uint32_t>(geoms.size());
buildInfo.pGeometries = geoms.data();
std::vector<uint32_t> maxPrim(geoms.size());
for (size_t i = 0; i < ranges.size(); ++i) maxPrim[i] = ranges[i].primitiveCount;
VkAccelerationStructureBuildSizesInfoKHR sizes{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR};
_vkGetAccelerationStructureBuildSizesKHR(_device->device(), VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR,
&buildInfo, maxPrim.data(), &sizes);
// allocate AS storage and scratch
AccelStructureHandle blas{};
blas.storage = _resources->create_buffer(sizes.accelerationStructureSize,
VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR |
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
VMA_MEMORY_USAGE_GPU_ONLY);
VkAccelerationStructureCreateInfoKHR asci{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR};
asci.type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR;
asci.buffer = blas.storage.buffer;
asci.size = sizes.accelerationStructureSize;
VK_CHECK(_vkCreateAccelerationStructureKHR(_device->device(), &asci, nullptr, &blas.handle));
AllocatedBuffer scratch = _resources->create_buffer(sizes.buildScratchSize,
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
VMA_MEMORY_USAGE_GPU_ONLY);
VkDeviceAddress scratchAddr = get_buffer_address(_device->device(), scratch.buffer);
buildInfo.dstAccelerationStructure = blas.handle;
buildInfo.scratchData.deviceAddress = scratchAddr;
// build with immediate submit
std::vector<const VkAccelerationStructureBuildRangeInfoKHR *> pRanges(geoms.size());
for (size_t i = 0; i < geoms.size(); ++i) pRanges[i] = &ranges[i];
_resources->immediate_submit([&](VkCommandBuffer cmd) {
_vkCmdBuildAccelerationStructuresKHR(cmd, 1, &buildInfo, pRanges.data());
});
// destroy scratch
_resources->destroy_buffer(scratch);
// device address
VkAccelerationStructureDeviceAddressInfoKHR dai{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_DEVICE_ADDRESS_INFO_KHR};
dai.accelerationStructure = blas.handle;
blas.deviceAddress = _vkGetAccelerationStructureDeviceAddressKHR(_device->device(), &dai);
_blasByVB.emplace(vb, blas);
return blas;
}
void RayTracingManager::ensure_tlas_storage(VkDeviceSize requiredASSize, VkDeviceSize /*requiredScratch*/)
{
// Simple: recreate TLAS storage if size grows
if (_tlas.handle)
{
_vkDestroyAccelerationStructureKHR(_device->device(), _tlas.handle, nullptr);
_tlas.handle = VK_NULL_HANDLE;
}
if (_tlas.storage.buffer)
{
_resources->destroy_buffer(_tlas.storage);
_tlas.storage = {};
}
_tlas.storage = _resources->create_buffer(requiredASSize,
VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR |
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
VMA_MEMORY_USAGE_GPU_ONLY);
VkAccelerationStructureCreateInfoKHR asci{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR};
asci.type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR;
asci.buffer = _tlas.storage.buffer;
asci.size = requiredASSize;
VK_CHECK(_vkCreateAccelerationStructureKHR(_device->device(), &asci, nullptr, &_tlas.handle));
}
VkAccelerationStructureKHR RayTracingManager::buildTLASFromDrawContext(const DrawContext &dc)
{
// Collect instances; one per render object (opaque only).
std::vector<VkAccelerationStructureInstanceKHR> instances;
instances.reserve(dc.OpaqueSurfaces.size());
for (const auto &r: dc.OpaqueSurfaces)
{
// Find mesh BLAS by vertex buffer
AccelStructureHandle blas{};
// We don't have MeshAsset pointer here; BLAS cache is keyed by VB handle; if missing, skip
auto it = _blasByVB.find(r.vertexBuffer);
if (it == _blasByVB.end())
{
// Can't build BLAS on the fly without mesh topology; skip this instance
continue;
}
blas = it->second;
VkAccelerationStructureInstanceKHR inst{};
// Fill 3x4 row-major from GLM column-major mat4
const glm::mat4 &m = r.transform;
for (int row = 0; row < 3; ++row)
for (int col = 0; col < 4; ++col)
inst.transform.matrix[row][col] = m[col][row];
inst.instanceCustomIndex = 0;
inst.mask = 0xFF;
inst.instanceShaderBindingTableRecordOffset = 0;
inst.flags = VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR; // two-sided
inst.accelerationStructureReference = blas.deviceAddress;
instances.push_back(inst);
}
if (instances.empty())
{
// nothing to build
return _tlas.handle;
}
// Ensure instance buffer capacity
if (instances.size() > _tlasInstanceCapacity)
{
if (_tlasInstanceBuffer.buffer)
{
_resources->destroy_buffer(_tlasInstanceBuffer);
}
_tlasInstanceCapacity = instances.size();
_tlasInstanceBuffer = _resources->create_buffer(
_tlasInstanceCapacity * sizeof(VkAccelerationStructureInstanceKHR),
VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR |
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT |
VK_BUFFER_USAGE_TRANSFER_DST_BIT,
VMA_MEMORY_USAGE_CPU_TO_GPU);
}
// upload instances
{
VmaAllocationInfo ai{};
vmaGetAllocationInfo(_device->allocator(), _tlasInstanceBuffer.allocation, &ai);
std::memcpy(ai.pMappedData, instances.data(), instances.size() * sizeof(instances[0]));
vmaFlushAllocation(_device->allocator(), _tlasInstanceBuffer.allocation, 0, VK_WHOLE_SIZE);
}
VkDeviceAddress instAddr = get_buffer_address(_device->device(), _tlasInstanceBuffer.buffer);
VkAccelerationStructureGeometryInstancesDataKHR instData{
VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_INSTANCES_DATA_KHR
};
instData.arrayOfPointers = VK_FALSE;
instData.data.deviceAddress = instAddr;
VkAccelerationStructureGeometryKHR geom{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR};
geom.geometryType = VK_GEOMETRY_TYPE_INSTANCES_KHR;
geom.geometry.instances = instData;
VkAccelerationStructureBuildGeometryInfoKHR buildInfo{
VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR
};
buildInfo.type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR;
buildInfo.flags = VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_KHR;
buildInfo.mode = VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR;
buildInfo.geometryCount = 1;
buildInfo.pGeometries = &geom;
uint32_t primCount = static_cast<uint32_t>(instances.size());
VkAccelerationStructureBuildSizesInfoKHR sizes{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR};
_vkGetAccelerationStructureBuildSizesKHR(_device->device(), VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR,
&buildInfo, &primCount, &sizes);
ensure_tlas_storage(sizes.accelerationStructureSize, sizes.buildScratchSize);
buildInfo.dstAccelerationStructure = _tlas.handle;
AllocatedBuffer scratch = _resources->create_buffer(sizes.buildScratchSize,
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
VMA_MEMORY_USAGE_GPU_ONLY);
VkDeviceAddress scratchAddr = get_buffer_address(_device->device(), scratch.buffer);
buildInfo.scratchData.deviceAddress = scratchAddr;
VkAccelerationStructureBuildRangeInfoKHR range{};
range.primitiveCount = primCount;
const VkAccelerationStructureBuildRangeInfoKHR *pRange = &range;
_resources->immediate_submit([&](VkCommandBuffer cmd) {
_vkCmdBuildAccelerationStructuresKHR(cmd, 1, &buildInfo, &pRange);
});
_resources->destroy_buffer(scratch);
VkAccelerationStructureDeviceAddressInfoKHR dai{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_DEVICE_ADDRESS_INFO_KHR};
dai.accelerationStructure = _tlas.handle;
_tlas.deviceAddress = _vkGetAccelerationStructureDeviceAddressKHR(_device->device(), &dai);
return _tlas.handle;
}

52
src/core/vk_raytracing.h Normal file
View File

@@ -0,0 +1,52 @@
#pragma once
#include <core/vk_types.h>
#include <unordered_map>
#include <vector>
#include <memory>
class DeviceManager;
class ResourceManager;
struct DrawContext;
struct MeshAsset;
struct AccelStructureHandle {
VkAccelerationStructureKHR handle{VK_NULL_HANDLE};
AllocatedBuffer storage{}; // buffer that backs the AS
VkDeviceAddress deviceAddress{0};
};
class RayTracingManager {
public:
void init(DeviceManager* dev, ResourceManager* res);
void cleanup();
// Build (or get) BLAS for a mesh. Safe to call multiple times.
AccelStructureHandle getOrBuildBLAS(const std::shared_ptr<MeshAsset>& mesh);
// Rebuild TLAS from current draw context; returns TLAS handle (or null if unavailable)
VkAccelerationStructureKHR buildTLASFromDrawContext(const DrawContext& dc);
VkAccelerationStructureKHR tlas() const { return _tlas.handle; }
VkDeviceAddress tlasAddress() const { return _tlas.deviceAddress; }
private:
// function pointers (resolved on init)
PFN_vkCreateAccelerationStructureKHR _vkCreateAccelerationStructureKHR{};
PFN_vkDestroyAccelerationStructureKHR _vkDestroyAccelerationStructureKHR{};
PFN_vkGetAccelerationStructureBuildSizesKHR _vkGetAccelerationStructureBuildSizesKHR{};
PFN_vkCmdBuildAccelerationStructuresKHR _vkCmdBuildAccelerationStructuresKHR{};
PFN_vkGetAccelerationStructureDeviceAddressKHR _vkGetAccelerationStructureDeviceAddressKHR{};
DeviceManager* _device{nullptr};
ResourceManager* _resources{nullptr};
// BLAS cache by vertex buffer handle
std::unordered_map<VkBuffer, AccelStructureHandle> _blasByVB;
// TLAS + scratch / instance buffer (rebuilt per frame)
AccelStructureHandle _tlas{};
AllocatedBuffer _tlasInstanceBuffer{};
size_t _tlasInstanceCapacity{0};
void ensure_tlas_storage(VkDeviceSize requiredASSize, VkDeviceSize requiredScratch);
};

View File

@@ -179,8 +179,10 @@ GPUMeshBuffers ResourceManager::uploadMesh(std::span<uint32_t> indices, std::spa
//create vertex buffer
newSurface.vertexBuffer = create_buffer(vertexBufferSize,
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
VK_BUFFER_USAGE_TRANSFER_DST_BIT |
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT |
VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR,
VMA_MEMORY_USAGE_GPU_ONLY);
//find the adress of the vertex buffer
@@ -191,8 +193,20 @@ GPUMeshBuffers ResourceManager::uploadMesh(std::span<uint32_t> indices, std::spa
//create index buffer
newSurface.indexBuffer = create_buffer(indexBufferSize,
VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
VK_BUFFER_USAGE_TRANSFER_DST_BIT |
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT |
VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR,
VMA_MEMORY_USAGE_GPU_ONLY);
// index buffer device address (needed for acceleration structure builds)
{
VkBufferDeviceAddressInfo indexAddrInfo{ .sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO };
indexAddrInfo.buffer = newSurface.indexBuffer.buffer;
newSurface.indexBufferAddress = vkGetBufferDeviceAddress(_deviceManager->device(), &indexAddrInfo);
}
// store counts for AS builds
newSurface.vertexCount = static_cast<uint32_t>(vertices.size());
newSurface.indexCount = static_cast<uint32_t>(indices.size());
AllocatedBuffer staging = create_buffer(vertexBufferSize + indexBufferSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
VMA_MEMORY_USAGE_CPU_ONLY);

View File

@@ -78,6 +78,9 @@ struct GPUSceneData {
glm::mat4 lightViewProjCascades[4];
glm::vec4 cascadeSplitsView;
// Hybrid ray-query options (match shaders/input_structures.glsl)
glm::uvec4 rtOptions; // x: enabled (1/0), y: cascade mask, z,w: reserved
glm::vec4 rtParams; // x: N·L threshold, yzw: reserved
};
enum class MaterialPass :uint8_t {
@@ -112,6 +115,9 @@ struct GPUMeshBuffers {
AllocatedBuffer indexBuffer;
AllocatedBuffer vertexBuffer;
VkDeviceAddress vertexBufferAddress;
VkDeviceAddress indexBufferAddress;
uint32_t vertexCount{0};
uint32_t indexCount{0};
};
// push constants for our mesh object draws

View File

@@ -18,6 +18,8 @@
#include "render/rg_graph.h"
#include <array>
#include "vk_raytracing.h"
void LightingPass::init(EngineContext *context)
{
_context = context;
@@ -166,6 +168,15 @@ void LightingPass::draw_lighting(VkCommandBuffer cmd,
deviceManager->device(), descriptorLayouts->gpuSceneDataLayout());
DescriptorWriter writer;
writer.write_buffer(0, gpuSceneDataBuffer.buffer, sizeof(GPUSceneData), 0, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER);
// If TLAS available and feature enabled, bind it at (set=0,binding=1)
if (ctxLocal->ray && ctxLocal->getDevice()->supportsAccelerationStructure() && ctxLocal->shadowSettings.mode != 0u)
{
VkAccelerationStructureKHR tlas = ctxLocal->ray->tlas();
if (tlas != VK_NULL_HANDLE)
{
writer.write_acceleration_structure(1, tlas);
}
}
writer.update_set(deviceManager->device(), globalDescriptor);
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, _pipeline);

View File

@@ -488,6 +488,10 @@ std::optional<std::shared_ptr<LoadedGLTF> > loadGltf(VulkanEngine *engine, std::
}
newmesh->meshBuffers = engine->_resourceManager->uploadMesh(indices, vertices);
if (engine->_rayManager)
{
engine->_rayManager->getOrBuildBLAS(newmesh);
}
}
//> load_nodes
// load all nodes and their meshes

View File

@@ -171,6 +171,15 @@ void SceneManager::update_scene()
}
}
// Publish shadow/RT settings to SceneData
if (_context)
{
const auto &ss = _context->shadowSettings;
const uint32_t rtEnabled = (ss.mode != 0) ? 1u : 0u;
sceneData.rtOptions = glm::uvec4(rtEnabled, ss.hybridRayCascadesMask, ss.mode, 0u);
sceneData.rtParams = glm::vec4(ss.hybridRayNoLThreshold, 0.0f, 0.0f, 0.0f);
}
auto end = std::chrono::system_clock::now();
auto elapsed = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
stats.scene_update_time = elapsed.count() / 1000.f;