ADD: Ray tracing shadow with Clipmap shadow blending
This commit is contained in:
@@ -35,7 +35,7 @@ foreach(GLSL ${GLSL_SOURCE_FILES})
|
|||||||
message(STATUS ${GLSL})
|
message(STATUS ${GLSL})
|
||||||
add_custom_command(
|
add_custom_command(
|
||||||
OUTPUT ${SPIRV}
|
OUTPUT ${SPIRV}
|
||||||
COMMAND ${GLSL_VALIDATOR} -V ${GLSL} -o ${SPIRV}
|
COMMAND ${GLSL_VALIDATOR} -V --target-env vulkan1.2 ${GLSL} -o ${SPIRV}
|
||||||
DEPENDS ${GLSL})
|
DEPENDS ${GLSL})
|
||||||
list(APPEND SPIRV_BINARY_FILES ${SPIRV})
|
list(APPEND SPIRV_BINARY_FILES ${SPIRV})
|
||||||
endforeach(GLSL)
|
endforeach(GLSL)
|
||||||
|
|||||||
@@ -1,3 +1,9 @@
|
|||||||
Get-ChildItem -Path "shaders" -Include *.frag,*.vert,*.comp,*.geom,*.tesc,*.tese,*.mesh,*.task,*.rgen,*.rint,*.rahit,*.rchit,*.rmiss,*.rcall -Recurse | ForEach-Object {
|
$COMMON = @("--target-env=vulkan1.3", "-O", "-g", "-Werror", "-I", "shaders")
|
||||||
glslc $_.FullName -o "$($_.FullName).spv"
|
|
||||||
}
|
Get-ChildItem -Path "shaders" -Include *.frag,*.vert,*.comp,*.geom,*.tesc,*.tese,*.mesh,*.task,*.rgen,*.rint,*.rahit,*.rchit,*.rmiss,*.rcall -Recurse |
|
||||||
|
ForEach-Object {
|
||||||
|
$extra = @()
|
||||||
|
if ($_.Extension -eq ".mesh") { $extra += "-fshader-stage=mesh" }
|
||||||
|
elseif ($_.Extension -eq ".task") { $extra += "-fshader-stage=task" }
|
||||||
|
glslc $_.FullName @COMMON @extra -o "$($_.FullName).spv"
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
#version 450
|
#version 460
|
||||||
#extension GL_GOOGLE_include_directive : require
|
#extension GL_GOOGLE_include_directive : require
|
||||||
|
#extension GL_EXT_ray_query : require
|
||||||
#include "input_structures.glsl"
|
#include "input_structures.glsl"
|
||||||
|
|
||||||
layout(location=0) in vec2 inUV;
|
layout(location=0) in vec2 inUV;
|
||||||
@@ -9,6 +10,10 @@ layout(set=1, binding=0) uniform sampler2D posTex;
|
|||||||
layout(set=1, binding=1) uniform sampler2D normalTex;
|
layout(set=1, binding=1) uniform sampler2D normalTex;
|
||||||
layout(set=1, binding=2) uniform sampler2D albedoTex;
|
layout(set=1, binding=2) uniform sampler2D albedoTex;
|
||||||
layout(set=2, binding=0) uniform sampler2D shadowTex[4];
|
layout(set=2, binding=0) uniform sampler2D shadowTex[4];
|
||||||
|
// TLAS for ray query (optional, guarded by sceneData.rtOptions.x)
|
||||||
|
#ifdef GL_EXT_ray_query
|
||||||
|
layout(set=0, binding=1) uniform accelerationStructureEXT topLevelAS;
|
||||||
|
#endif
|
||||||
|
|
||||||
// Tunables for shadow quality and blending
|
// Tunables for shadow quality and blending
|
||||||
// Border smoothing width in light-space NDC (0..1). Larger = wider cross-fade.
|
// Border smoothing width in light-space NDC (0..1). Larger = wider cross-fade.
|
||||||
@@ -23,6 +28,9 @@ const float SHADOW_NORMAL_OFFSET = 0.0025;
|
|||||||
const float SHADOW_RPDB_SCALE = 1.0;
|
const float SHADOW_RPDB_SCALE = 1.0;
|
||||||
// Minimum clamp to keep a tiny bias even on perpendicular receivers
|
// Minimum clamp to keep a tiny bias even on perpendicular receivers
|
||||||
const float SHADOW_MIN_BIAS = 1e-5;
|
const float SHADOW_MIN_BIAS = 1e-5;
|
||||||
|
// Ray query safety params
|
||||||
|
const float SHADOW_RAY_TMIN = 0.02; // start a bit away from the surface
|
||||||
|
const float SHADOW_RAY_ORIGIN_BIAS = 0.01; // world units
|
||||||
|
|
||||||
const float PI = 3.14159265359;
|
const float PI = 3.14159265359;
|
||||||
|
|
||||||
@@ -182,13 +190,76 @@ float calcShadowVisibility(vec3 worldPos, vec3 N, vec3 L)
|
|||||||
{
|
{
|
||||||
vec3 wp = worldPos + N * SHADOW_NORMAL_OFFSET * (0.5 + 0.5 * (1.0 - max(dot(N, L), 0.0)));
|
vec3 wp = worldPos + N * SHADOW_NORMAL_OFFSET * (0.5 + 0.5 * (1.0 - max(dot(N, L), 0.0)));
|
||||||
|
|
||||||
|
// RT-only mode: cast a ray and skip clipmap sampling entirely
|
||||||
|
if (sceneData.rtOptions.z == 2u) {
|
||||||
|
#ifdef GL_EXT_ray_query
|
||||||
|
float farR = max(max(sceneData.cascadeSplitsView.x, sceneData.cascadeSplitsView.y),
|
||||||
|
max(sceneData.cascadeSplitsView.z, sceneData.cascadeSplitsView.w));
|
||||||
|
rayQueryEXT rq;
|
||||||
|
rayQueryInitializeEXT(rq, topLevelAS, gl_RayFlagsTerminateOnFirstHitEXT | gl_RayFlagsOpaqueEXT,
|
||||||
|
0xFF, wp + N * SHADOW_RAY_ORIGIN_BIAS, SHADOW_RAY_TMIN, L, farR);
|
||||||
|
while (rayQueryProceedEXT(rq)) { }
|
||||||
|
bool hit = (rayQueryGetIntersectionTypeEXT(rq, true) != gl_RayQueryCommittedIntersectionNoneEXT);
|
||||||
|
return hit ? 0.0 : 1.0;
|
||||||
|
#else
|
||||||
|
// Fallback to clipmap PCF if ray query is not available at compile time
|
||||||
|
;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
CascadeMix cm = computeCascadeMix(wp);
|
CascadeMix cm = computeCascadeMix(wp);
|
||||||
float v0 = sampleCascadeShadow(cm.i0, wp, N, L);
|
float v0 = sampleCascadeShadow(cm.i0, wp, N, L);
|
||||||
if (cm.w1 <= 0.0)
|
if (cm.w1 <= 0.0)
|
||||||
|
{
|
||||||
|
// Hybrid ray query assist (terminate-on-first-hit along -L)
|
||||||
|
#ifdef GL_EXT_ray_query
|
||||||
|
if (sceneData.rtOptions.x == 1u)
|
||||||
|
{
|
||||||
|
float NoL = max(dot(N, L), 0.0);
|
||||||
|
uint mask = sceneData.rtOptions.y;
|
||||||
|
bool cascadeEnabled = ((mask >> cm.i0) & 1u) == 1u;
|
||||||
|
if (cascadeEnabled && NoL < sceneData.rtParams.x)
|
||||||
|
{
|
||||||
|
float maxT = sceneData.cascadeSplitsView[cm.i0];
|
||||||
|
rayQueryEXT rq;
|
||||||
|
// tmin: small offset to avoid self-hits
|
||||||
|
rayQueryInitializeEXT(rq, topLevelAS, gl_RayFlagsTerminateOnFirstHitEXT | gl_RayFlagsOpaqueEXT,
|
||||||
|
0xFF, wp + N * SHADOW_RAY_ORIGIN_BIAS, SHADOW_RAY_TMIN, L, maxT);
|
||||||
|
bool hit = false;
|
||||||
|
while (rayQueryProceedEXT(rq)) { }
|
||||||
|
hit = (rayQueryGetIntersectionTypeEXT(rq, true) != gl_RayQueryCommittedIntersectionNoneEXT);
|
||||||
|
if (hit) v0 = min(v0, 0.0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
return v0;
|
return v0;
|
||||||
|
}
|
||||||
|
|
||||||
float v1 = sampleCascadeShadow(cm.i1, wp, N, L);
|
float v1 = sampleCascadeShadow(cm.i1, wp, N, L);
|
||||||
return mix(v0, v1, clamp(cm.w1, 0.0, 1.0));
|
float vis = mix(v0, v1, clamp(cm.w1, 0.0, 1.0));
|
||||||
|
// Hybrid assist across blended border: take min if a ray hits in either cascade
|
||||||
|
#ifdef GL_EXT_ray_query
|
||||||
|
if (sceneData.rtOptions.x == 1u)
|
||||||
|
{
|
||||||
|
float NoL = max(dot(N, L), 0.0);
|
||||||
|
uint mask = sceneData.rtOptions.y;
|
||||||
|
bool e0 = ((mask >> cm.i0) & 1u) == 1u;
|
||||||
|
bool e1 = ((mask >> cm.i1) & 1u) == 1u;
|
||||||
|
if (NoL < sceneData.rtParams.x && (e0 || e1))
|
||||||
|
{
|
||||||
|
float maxT0 = sceneData.cascadeSplitsView[cm.i0];
|
||||||
|
float maxT1 = sceneData.cascadeSplitsView[cm.i1];
|
||||||
|
float maxT = max(maxT0, maxT1);
|
||||||
|
rayQueryEXT rq;
|
||||||
|
rayQueryInitializeEXT(rq, topLevelAS, gl_RayFlagsTerminateOnFirstHitEXT | gl_RayFlagsOpaqueEXT,
|
||||||
|
0xFF, wp + N * SHADOW_RAY_ORIGIN_BIAS, SHADOW_RAY_TMIN, L, maxT);
|
||||||
|
while (rayQueryProceedEXT(rq)) { }
|
||||||
|
bool hit = (rayQueryGetIntersectionTypeEXT(rq, true) != gl_RayQueryCommittedIntersectionNoneEXT);
|
||||||
|
if (hit) vis = min(vis, 0.0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return vis;
|
||||||
}
|
}
|
||||||
|
|
||||||
vec3 fresnelSchlick(float cosTheta, vec3 F0)
|
vec3 fresnelSchlick(float cosTheta, vec3 F0)
|
||||||
|
|||||||
@@ -16,6 +16,12 @@ layout(set = 0, binding = 0) uniform SceneData{
|
|||||||
mat4 lightViewProjCascades[4];
|
mat4 lightViewProjCascades[4];
|
||||||
// View-space split distances for selecting cascades (x,y,z,w)
|
// View-space split distances for selecting cascades (x,y,z,w)
|
||||||
vec4 cascadeSplitsView;
|
vec4 cascadeSplitsView;
|
||||||
|
// Ray-query settings (packed)
|
||||||
|
// rtOptions.x = enabled (1/0)
|
||||||
|
// rtOptions.y = cascade bitmask (bit i => cascade i assisted)
|
||||||
|
uvec4 rtOptions;
|
||||||
|
// rtParams.x = N·L threshold; others reserved
|
||||||
|
vec4 rtParams;
|
||||||
} sceneData;
|
} sceneData;
|
||||||
|
|
||||||
layout(set = 1, binding = 0) uniform GLTFMaterialData{
|
layout(set = 1, binding = 0) uniform GLTFMaterialData{
|
||||||
|
|||||||
@@ -35,6 +35,8 @@ add_executable (vulkan_engine
|
|||||||
core/config.h
|
core/config.h
|
||||||
core/vk_engine.h
|
core/vk_engine.h
|
||||||
core/vk_engine.cpp
|
core/vk_engine.cpp
|
||||||
|
core/vk_raytracing.h
|
||||||
|
core/vk_raytracing.cpp
|
||||||
# render
|
# render
|
||||||
render/vk_pipelines.h
|
render/vk_pipelines.h
|
||||||
render/vk_pipelines.cpp
|
render/vk_pipelines.cpp
|
||||||
|
|||||||
@@ -282,6 +282,11 @@ std::shared_ptr<MeshAsset> AssetManager::createMesh(const std::string &name,
|
|||||||
auto mesh = std::make_shared<MeshAsset>();
|
auto mesh = std::make_shared<MeshAsset>();
|
||||||
mesh->name = name;
|
mesh->name = name;
|
||||||
mesh->meshBuffers = _engine->_resourceManager->uploadMesh(indices, vertices);
|
mesh->meshBuffers = _engine->_resourceManager->uploadMesh(indices, vertices);
|
||||||
|
// Build BLAS for the mesh if ray tracing manager is available
|
||||||
|
if (_engine->_rayManager)
|
||||||
|
{
|
||||||
|
_engine->_rayManager->getOrBuildBLAS(mesh);
|
||||||
|
}
|
||||||
|
|
||||||
GeoSurface surf{};
|
GeoSurface surf{};
|
||||||
surf.startIndex = 0;
|
surf.startIndex = 0;
|
||||||
|
|||||||
@@ -29,6 +29,16 @@ struct FrameResources;
|
|||||||
struct SDL_Window;
|
struct SDL_Window;
|
||||||
class AssetManager;
|
class AssetManager;
|
||||||
class RenderGraph;
|
class RenderGraph;
|
||||||
|
class RayTracingManager;
|
||||||
|
|
||||||
|
struct ShadowSettings
|
||||||
|
{
|
||||||
|
// 0 = Clipmap only, 1 = Clipmap + RT assist, 2 = RT only
|
||||||
|
uint32_t mode = 0;
|
||||||
|
bool hybridRayQueryEnabled = false; // derived convenience: (mode != 0)
|
||||||
|
uint32_t hybridRayCascadesMask = 0b1110; // bit i => cascade i uses ray query assist (default: 1..3)
|
||||||
|
float hybridRayNoLThreshold = 0.25f; // trigger when N·L below this (mode==1)
|
||||||
|
};
|
||||||
|
|
||||||
class EngineContext
|
class EngineContext
|
||||||
{
|
{
|
||||||
@@ -60,6 +70,12 @@ public:
|
|||||||
// Assets
|
// Assets
|
||||||
AssetManager* assets = nullptr; // non-owning pointer to central AssetManager
|
AssetManager* assets = nullptr; // non-owning pointer to central AssetManager
|
||||||
|
|
||||||
|
// Runtime settings visible to passes/shaders
|
||||||
|
ShadowSettings shadowSettings{};
|
||||||
|
|
||||||
|
// Ray tracing manager (optional, nullptr if unsupported)
|
||||||
|
RayTracingManager* ray = nullptr;
|
||||||
|
|
||||||
// Accessors
|
// Accessors
|
||||||
DeviceManager *getDevice() const { return device.get(); }
|
DeviceManager *getDevice() const { return device.get(); }
|
||||||
ResourceManager *getResources() const { return resources.get(); }
|
ResourceManager *getResources() const { return resources.get(); }
|
||||||
|
|||||||
@@ -13,6 +13,11 @@ void DescriptorManager::init(DeviceManager *deviceManager)
|
|||||||
} {
|
} {
|
||||||
DescriptorLayoutBuilder builder;
|
DescriptorLayoutBuilder builder;
|
||||||
builder.add_binding(0, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER);
|
builder.add_binding(0, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER);
|
||||||
|
if (_deviceManager->supportsAccelerationStructure())
|
||||||
|
{
|
||||||
|
// TLAS for ray query (set=0,binding=1)
|
||||||
|
builder.add_binding(1, VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR);
|
||||||
|
}
|
||||||
_gpuSceneDataDescriptorLayout = builder.build(
|
_gpuSceneDataDescriptorLayout = builder.build(
|
||||||
_deviceManager->device(), VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT);
|
_deviceManager->device(), VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -75,6 +75,21 @@ void DescriptorWriter::write_image(int binding, VkImageView image, VkSampler sam
|
|||||||
writes.push_back(write);
|
writes.push_back(write);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void DescriptorWriter::write_acceleration_structure(int binding, VkAccelerationStructureKHR as)
|
||||||
|
{
|
||||||
|
VkWriteDescriptorSetAccelerationStructureKHR &acc = accelInfos.emplace_back(
|
||||||
|
VkWriteDescriptorSetAccelerationStructureKHR{ VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR });
|
||||||
|
acc.accelerationStructureCount = 1;
|
||||||
|
acc.pAccelerationStructures = &as;
|
||||||
|
|
||||||
|
VkWriteDescriptorSet write{ VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET };
|
||||||
|
write.dstBinding = binding;
|
||||||
|
write.descriptorType = VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR;
|
||||||
|
write.descriptorCount = 1;
|
||||||
|
write.pNext = &acc;
|
||||||
|
writes.push_back(write);
|
||||||
|
}
|
||||||
|
|
||||||
void DescriptorWriter::clear()
|
void DescriptorWriter::clear()
|
||||||
{
|
{
|
||||||
imageInfos.clear();
|
imageInfos.clear();
|
||||||
|
|||||||
@@ -19,12 +19,15 @@ struct DescriptorWriter
|
|||||||
{
|
{
|
||||||
std::deque<VkDescriptorImageInfo> imageInfos;
|
std::deque<VkDescriptorImageInfo> imageInfos;
|
||||||
std::deque<VkDescriptorBufferInfo> bufferInfos;
|
std::deque<VkDescriptorBufferInfo> bufferInfos;
|
||||||
|
std::deque<VkWriteDescriptorSetAccelerationStructureKHR> accelInfos;
|
||||||
std::vector<VkWriteDescriptorSet> writes;
|
std::vector<VkWriteDescriptorSet> writes;
|
||||||
|
|
||||||
void write_image(int binding, VkImageView image, VkSampler sampler, VkImageLayout layout, VkDescriptorType type);
|
void write_image(int binding, VkImageView image, VkSampler sampler, VkImageLayout layout, VkDescriptorType type);
|
||||||
|
|
||||||
void write_buffer(int binding, VkBuffer buffer, size_t size, size_t offset, VkDescriptorType type);
|
void write_buffer(int binding, VkBuffer buffer, size_t size, size_t offset, VkDescriptorType type);
|
||||||
|
|
||||||
|
void write_acceleration_structure(int binding, VkAccelerationStructureKHR as);
|
||||||
|
|
||||||
void clear();
|
void clear();
|
||||||
|
|
||||||
void update_set(VkDevice device, VkDescriptorSet set);
|
void update_set(VkDevice device, VkDescriptorSet set);
|
||||||
|
|||||||
@@ -31,7 +31,7 @@ void DeviceManager::init_vulkan(SDL_Window *window)
|
|||||||
features12.descriptorIndexing = true;
|
features12.descriptorIndexing = true;
|
||||||
|
|
||||||
//use vkbootstrap to select a gpu.
|
//use vkbootstrap to select a gpu.
|
||||||
//We want a gpu that can write to the SDL surface and supports vulkan 1.2
|
//We want a gpu that can write to the SDL surface and supports vulkan 1.3
|
||||||
vkb::PhysicalDeviceSelector selector{vkb_inst};
|
vkb::PhysicalDeviceSelector selector{vkb_inst};
|
||||||
vkb::PhysicalDevice physicalDevice = selector
|
vkb::PhysicalDevice physicalDevice = selector
|
||||||
.set_minimum_version(1, 3)
|
.set_minimum_version(1, 3)
|
||||||
@@ -42,10 +42,44 @@ void DeviceManager::init_vulkan(SDL_Window *window)
|
|||||||
.value();
|
.value();
|
||||||
|
|
||||||
//physicalDevice.features.
|
//physicalDevice.features.
|
||||||
//create the final vulkan device
|
// Enable ray tracing extensions on the physical device if supported (before creating the DeviceBuilder)
|
||||||
|
// Query ray tracing capability on the chosen physical device
|
||||||
|
{
|
||||||
|
VkPhysicalDeviceAccelerationStructureFeaturesKHR accelFeat{
|
||||||
|
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_FEATURES_KHR };
|
||||||
|
VkPhysicalDeviceRayQueryFeaturesKHR rayqFeat{
|
||||||
|
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_QUERY_FEATURES_KHR,
|
||||||
|
.pNext = &accelFeat };
|
||||||
|
VkPhysicalDeviceFeatures2 feats2{ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2, .pNext = &rayqFeat };
|
||||||
|
vkGetPhysicalDeviceFeatures2(physicalDevice.physical_device, &feats2);
|
||||||
|
_rayQuerySupported = (rayqFeat.rayQuery == VK_TRUE);
|
||||||
|
_accelStructSupported = (accelFeat.accelerationStructure == VK_TRUE);
|
||||||
|
fmt::print("[Device] RayQuery support: {} | AccelStruct: {}\n",
|
||||||
|
_rayQuerySupported ? "yes" : "no",
|
||||||
|
_accelStructSupported ? "yes" : "no");
|
||||||
|
|
||||||
|
if (_rayQuerySupported && _accelStructSupported)
|
||||||
|
{
|
||||||
|
physicalDevice.enable_extension_if_present(VK_KHR_ACCELERATION_STRUCTURE_EXTENSION_NAME);
|
||||||
|
physicalDevice.enable_extension_if_present(VK_KHR_RAY_QUERY_EXTENSION_NAME);
|
||||||
|
physicalDevice.enable_extension_if_present(VK_KHR_DEFERRED_HOST_OPERATIONS_EXTENSION_NAME);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//create the final vulkan device
|
||||||
vkb::DeviceBuilder deviceBuilder{physicalDevice};
|
vkb::DeviceBuilder deviceBuilder{physicalDevice};
|
||||||
|
|
||||||
|
// Enable ray query + accel struct features in device create pNext if supported
|
||||||
|
if (_rayQuerySupported && _accelStructSupported)
|
||||||
|
{
|
||||||
|
VkPhysicalDeviceAccelerationStructureFeaturesKHR accelReq{ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_FEATURES_KHR };
|
||||||
|
accelReq.accelerationStructure = VK_TRUE;
|
||||||
|
VkPhysicalDeviceRayQueryFeaturesKHR rayqReq{ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_QUERY_FEATURES_KHR };
|
||||||
|
rayqReq.pNext = &accelReq;
|
||||||
|
rayqReq.rayQuery = VK_TRUE;
|
||||||
|
deviceBuilder.add_pNext(&rayqReq);
|
||||||
|
}
|
||||||
|
|
||||||
vkb::Device vkbDevice = deviceBuilder.build().value();
|
vkb::Device vkbDevice = deviceBuilder.build().value();
|
||||||
|
|
||||||
// Get the VkDevice handle used in the rest of a vulkan application
|
// Get the VkDevice handle used in the rest of a vulkan application
|
||||||
|
|||||||
@@ -19,6 +19,10 @@ public:
|
|||||||
VmaAllocator allocator() const { return _allocator; }
|
VmaAllocator allocator() const { return _allocator; }
|
||||||
VkDebugUtilsMessengerEXT debugMessenger() { return _debug_messenger; }
|
VkDebugUtilsMessengerEXT debugMessenger() { return _debug_messenger; }
|
||||||
|
|
||||||
|
// Ray tracing capabilities (queried at init; not necessarily enabled)
|
||||||
|
bool supportsRayQuery() const { return _rayQuerySupported; }
|
||||||
|
bool supportsAccelerationStructure() const { return _accelStructSupported; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
VkInstance _instance = nullptr;
|
VkInstance _instance = nullptr;
|
||||||
VkDebugUtilsMessengerEXT _debug_messenger = nullptr;
|
VkDebugUtilsMessengerEXT _debug_messenger = nullptr;
|
||||||
@@ -30,4 +34,8 @@ private:
|
|||||||
VmaAllocator _allocator = nullptr;
|
VmaAllocator _allocator = nullptr;
|
||||||
|
|
||||||
DeletionQueue _deletionQueue;
|
DeletionQueue _deletionQueue;
|
||||||
|
|
||||||
|
// Cached feature support flags
|
||||||
|
bool _rayQuerySupported{false};
|
||||||
|
bool _accelStructSupported{false};
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -100,6 +100,14 @@ void VulkanEngine::init()
|
|||||||
_assetManager->init(this);
|
_assetManager->init(this);
|
||||||
_context->assets = _assetManager.get();
|
_context->assets = _assetManager.get();
|
||||||
|
|
||||||
|
// Optional ray tracing manager if supported and extensions enabled
|
||||||
|
if (_deviceManager->supportsRayQuery() && _deviceManager->supportsAccelerationStructure())
|
||||||
|
{
|
||||||
|
_rayManager = std::make_unique<RayTracingManager>();
|
||||||
|
_rayManager->init(_deviceManager.get(), _resourceManager.get());
|
||||||
|
_context->ray = _rayManager.get();
|
||||||
|
}
|
||||||
|
|
||||||
_sceneManager = std::make_unique<SceneManager>();
|
_sceneManager = std::make_unique<SceneManager>();
|
||||||
_sceneManager->init(_context.get());
|
_sceneManager->init(_context.get());
|
||||||
_context->scene = _sceneManager.get();
|
_context->scene = _sceneManager.get();
|
||||||
@@ -128,7 +136,7 @@ void VulkanEngine::init()
|
|||||||
auto imguiPass = std::make_unique<ImGuiPass>();
|
auto imguiPass = std::make_unique<ImGuiPass>();
|
||||||
_renderPassManager->setImGuiPass(std::move(imguiPass));
|
_renderPassManager->setImGuiPass(std::move(imguiPass));
|
||||||
|
|
||||||
const std::string structurePath = _assetManager->modelPath("resi.glb");
|
const std::string structurePath = _assetManager->modelPath("police_office.glb");
|
||||||
const auto structureFile = _assetManager->loadGLTF(structurePath);
|
const auto structureFile = _assetManager->loadGLTF(structurePath);
|
||||||
|
|
||||||
assert(structureFile.has_value());
|
assert(structureFile.has_value());
|
||||||
@@ -258,6 +266,11 @@ void VulkanEngine::cleanup()
|
|||||||
void VulkanEngine::draw()
|
void VulkanEngine::draw()
|
||||||
{
|
{
|
||||||
_sceneManager->update_scene();
|
_sceneManager->update_scene();
|
||||||
|
// Build or update TLAS for current frame if RT mode enabled (1 or 2)
|
||||||
|
if (_rayManager && _context->shadowSettings.mode != 0u)
|
||||||
|
{
|
||||||
|
_rayManager->buildTLASFromDrawContext(_context->getMainDrawContext());
|
||||||
|
}
|
||||||
//> frame_clear
|
//> frame_clear
|
||||||
//wait until the gpu has finished rendering the last frame. Timeout of 1 second
|
//wait until the gpu has finished rendering the last frame. Timeout of 1 second
|
||||||
VK_CHECK(vkWaitForFences(_deviceManager->device(), 1, &get_current_frame()._renderFence, true, 1000000000));
|
VK_CHECK(vkWaitForFences(_deviceManager->device(), 1, &get_current_frame()._renderFence, true, 1000000000));
|
||||||
@@ -324,8 +337,7 @@ void VulkanEngine::draw()
|
|||||||
RGImageHandle hGBufferAlbedo = _renderGraph->import_gbuffer_albedo();
|
RGImageHandle hGBufferAlbedo = _renderGraph->import_gbuffer_albedo();
|
||||||
RGImageHandle hSwapchain = _renderGraph->import_swapchain_image(swapchainImageIndex);
|
RGImageHandle hSwapchain = _renderGraph->import_swapchain_image(swapchainImageIndex);
|
||||||
|
|
||||||
// Create a transient shadow depth target (fixed resolution for now)
|
// Create transient depth targets for cascaded shadow maps (even if RT-only, to keep descriptors stable)
|
||||||
// Create transient depth targets for cascaded shadow maps
|
|
||||||
const VkExtent2D shadowExtent{2048, 2048};
|
const VkExtent2D shadowExtent{2048, 2048};
|
||||||
std::array<RGImageHandle, kShadowCascadeCount> hShadowCascades{};
|
std::array<RGImageHandle, kShadowCascadeCount> hShadowCascades{};
|
||||||
for (int i = 0; i < kShadowCascadeCount; ++i)
|
for (int i = 0; i < kShadowCascadeCount; ++i)
|
||||||
@@ -345,9 +357,12 @@ void VulkanEngine::draw()
|
|||||||
{
|
{
|
||||||
background->register_graph(_renderGraph.get(), hDraw, hDepth);
|
background->register_graph(_renderGraph.get(), hDraw, hDepth);
|
||||||
}
|
}
|
||||||
if (auto *shadow = _renderPassManager->getPass<ShadowPass>())
|
if (_context->shadowSettings.mode != 2u)
|
||||||
{
|
{
|
||||||
shadow->register_graph(_renderGraph.get(), std::span<RGImageHandle>(hShadowCascades.data(), hShadowCascades.size()), shadowExtent);
|
if (auto *shadow = _renderPassManager->getPass<ShadowPass>())
|
||||||
|
{
|
||||||
|
shadow->register_graph(_renderGraph.get(), std::span<RGImageHandle>(hShadowCascades.data(), hShadowCascades.size()), shadowExtent);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (auto *geometry = _renderPassManager->getPass<GeometryPass>())
|
if (auto *geometry = _renderPassManager->getPass<GeometryPass>())
|
||||||
{
|
{
|
||||||
@@ -508,6 +523,50 @@ void VulkanEngine::run()
|
|||||||
ImGui::End();
|
ImGui::End();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Shadows / Ray Query settings
|
||||||
|
if (ImGui::Begin("Shadows"))
|
||||||
|
{
|
||||||
|
const bool rq = _deviceManager->supportsRayQuery();
|
||||||
|
const bool as = _deviceManager->supportsAccelerationStructure();
|
||||||
|
ImGui::Text("RayQuery: %s", rq ? "supported" : "not available");
|
||||||
|
ImGui::Text("AccelStruct: %s", as ? "supported" : "not available");
|
||||||
|
ImGui::Separator();
|
||||||
|
|
||||||
|
auto &ss = _context->shadowSettings;
|
||||||
|
// Mode selection
|
||||||
|
int mode = static_cast<int>(ss.mode);
|
||||||
|
ImGui::TextUnformatted("Shadow Mode");
|
||||||
|
ImGui::RadioButton("Clipmap only", &mode, 0); ImGui::SameLine();
|
||||||
|
ImGui::RadioButton("Clipmap + RT", &mode, 1); ImGui::SameLine();
|
||||||
|
ImGui::RadioButton("RT only", &mode, 2);
|
||||||
|
// If device lacks RT support, force mode 0
|
||||||
|
if (!(rq && as) && mode != 0) mode = 0;
|
||||||
|
ss.mode = static_cast<uint32_t>(mode);
|
||||||
|
ss.hybridRayQueryEnabled = (ss.mode != 0);
|
||||||
|
|
||||||
|
// Hybrid controls (mode 1)
|
||||||
|
ImGui::BeginDisabled(ss.mode != 1u);
|
||||||
|
ImGui::TextUnformatted("Cascades using ray assist:");
|
||||||
|
for (int i = 0; i < 4; ++i)
|
||||||
|
{
|
||||||
|
bool on = (ss.hybridRayCascadesMask >> i) & 1u;
|
||||||
|
std::string label = std::string("C") + std::to_string(i);
|
||||||
|
if (ImGui::Checkbox(label.c_str(), &on))
|
||||||
|
{
|
||||||
|
if (on) ss.hybridRayCascadesMask |= (1u << i);
|
||||||
|
else ss.hybridRayCascadesMask &= ~(1u << i);
|
||||||
|
}
|
||||||
|
if (i != 3) ImGui::SameLine();
|
||||||
|
}
|
||||||
|
ImGui::SliderFloat("N·L threshold", &ss.hybridRayNoLThreshold, 0.0f, 1.0f, "%.2f");
|
||||||
|
ImGui::EndDisabled();
|
||||||
|
|
||||||
|
ImGui::Separator();
|
||||||
|
ImGui::TextWrapped("Clipmap only: raster PCF+RPDB. Clipmap+RT: PCF assisted by ray query at low N·L. RT only: skip shadow maps and use ray tests only.");
|
||||||
|
|
||||||
|
ImGui::End();
|
||||||
|
}
|
||||||
|
|
||||||
// Render Graph debug window
|
// Render Graph debug window
|
||||||
if (ImGui::Begin("Render Graph"))
|
if (ImGui::Begin("Render Graph"))
|
||||||
{
|
{
|
||||||
@@ -660,7 +719,7 @@ void VulkanEngine::run()
|
|||||||
ImGui::End();
|
ImGui::End();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Draw targets window
|
// Draw targets window
|
||||||
if (ImGui::Begin("Targets"))
|
if (ImGui::Begin("Targets"))
|
||||||
{
|
{
|
||||||
ImGui::Text("Draw extent: %ux%u", _drawExtent.width, _drawExtent.height);
|
ImGui::Text("Draw extent: %ux%u", _drawExtent.width, _drawExtent.height);
|
||||||
@@ -719,6 +778,7 @@ void VulkanEngine::init_frame_resources()
|
|||||||
{VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 3},
|
{VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 3},
|
||||||
{VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 3},
|
{VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 3},
|
||||||
{VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 4},
|
{VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 4},
|
||||||
|
{VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, 1},
|
||||||
};
|
};
|
||||||
|
|
||||||
for (int i = 0; i < FRAME_OVERLAP; i++)
|
for (int i = 0; i < FRAME_OVERLAP; i++)
|
||||||
|
|||||||
@@ -30,6 +30,7 @@
|
|||||||
#include "core/vk_pipeline_manager.h"
|
#include "core/vk_pipeline_manager.h"
|
||||||
#include "core/asset_manager.h"
|
#include "core/asset_manager.h"
|
||||||
#include "render/rg_graph.h"
|
#include "render/rg_graph.h"
|
||||||
|
#include "core/vk_raytracing.h"
|
||||||
|
|
||||||
constexpr unsigned int FRAME_OVERLAP = 2;
|
constexpr unsigned int FRAME_OVERLAP = 2;
|
||||||
|
|
||||||
@@ -63,6 +64,7 @@ public:
|
|||||||
std::unique_ptr<PipelineManager> _pipelineManager;
|
std::unique_ptr<PipelineManager> _pipelineManager;
|
||||||
std::unique_ptr<AssetManager> _assetManager;
|
std::unique_ptr<AssetManager> _assetManager;
|
||||||
std::unique_ptr<RenderGraph> _renderGraph;
|
std::unique_ptr<RenderGraph> _renderGraph;
|
||||||
|
std::unique_ptr<RayTracingManager> _rayManager;
|
||||||
|
|
||||||
struct SDL_Window *_window{nullptr};
|
struct SDL_Window *_window{nullptr};
|
||||||
|
|
||||||
@@ -86,8 +88,6 @@ public:
|
|||||||
VkPipelineLayout _meshPipelineLayout;
|
VkPipelineLayout _meshPipelineLayout;
|
||||||
VkPipeline _meshPipeline;
|
VkPipeline _meshPipeline;
|
||||||
|
|
||||||
GPUMeshBuffers rectangle;
|
|
||||||
|
|
||||||
std::shared_ptr<MeshAsset> cubeMesh;
|
std::shared_ptr<MeshAsset> cubeMesh;
|
||||||
std::shared_ptr<MeshAsset> sphereMesh;
|
std::shared_ptr<MeshAsset> sphereMesh;
|
||||||
|
|
||||||
|
|||||||
309
src/core/vk_raytracing.cpp
Normal file
309
src/core/vk_raytracing.cpp
Normal file
@@ -0,0 +1,309 @@
|
|||||||
|
#include "vk_raytracing.h"
|
||||||
|
#include "vk_device.h"
|
||||||
|
#include "vk_resource.h"
|
||||||
|
#include "vk_initializers.h"
|
||||||
|
#include "scene/vk_loader.h"
|
||||||
|
#include "scene/vk_scene.h"
|
||||||
|
#include <cstring>
|
||||||
|
|
||||||
|
void RayTracingManager::init(DeviceManager *dev, ResourceManager *res)
|
||||||
|
{
|
||||||
|
_device = dev;
|
||||||
|
_resources = res;
|
||||||
|
// resolve function pointers
|
||||||
|
_vkCreateAccelerationStructureKHR = reinterpret_cast<PFN_vkCreateAccelerationStructureKHR>(
|
||||||
|
vkGetDeviceProcAddr(_device->device(), "vkCreateAccelerationStructureKHR"));
|
||||||
|
_vkDestroyAccelerationStructureKHR = reinterpret_cast<PFN_vkDestroyAccelerationStructureKHR>(
|
||||||
|
vkGetDeviceProcAddr(_device->device(), "vkDestroyAccelerationStructureKHR"));
|
||||||
|
_vkGetAccelerationStructureBuildSizesKHR = reinterpret_cast<PFN_vkGetAccelerationStructureBuildSizesKHR>(
|
||||||
|
vkGetDeviceProcAddr(_device->device(), "vkGetAccelerationStructureBuildSizesKHR"));
|
||||||
|
_vkCmdBuildAccelerationStructuresKHR = reinterpret_cast<PFN_vkCmdBuildAccelerationStructuresKHR>(
|
||||||
|
vkGetDeviceProcAddr(_device->device(), "vkCmdBuildAccelerationStructuresKHR"));
|
||||||
|
_vkGetAccelerationStructureDeviceAddressKHR = reinterpret_cast<PFN_vkGetAccelerationStructureDeviceAddressKHR>(
|
||||||
|
vkGetDeviceProcAddr(_device->device(), "vkGetAccelerationStructureDeviceAddressKHR"));
|
||||||
|
}
|
||||||
|
|
||||||
|
void RayTracingManager::cleanup()
|
||||||
|
{
|
||||||
|
VkDevice dv = _device->device();
|
||||||
|
if (_tlas.handle)
|
||||||
|
{
|
||||||
|
_vkDestroyAccelerationStructureKHR(dv, _tlas.handle, nullptr);
|
||||||
|
_tlas.handle = VK_NULL_HANDLE;
|
||||||
|
}
|
||||||
|
if (_tlas.storage.buffer)
|
||||||
|
{
|
||||||
|
_resources->destroy_buffer(_tlas.storage);
|
||||||
|
_tlas.storage = {};
|
||||||
|
}
|
||||||
|
if (_tlasInstanceBuffer.buffer)
|
||||||
|
{
|
||||||
|
_resources->destroy_buffer(_tlasInstanceBuffer);
|
||||||
|
_tlasInstanceBuffer = {};
|
||||||
|
_tlasInstanceCapacity = 0;
|
||||||
|
}
|
||||||
|
for (auto &kv: _blasByVB)
|
||||||
|
{
|
||||||
|
if (kv.second.handle)
|
||||||
|
{
|
||||||
|
_vkDestroyAccelerationStructureKHR(dv, kv.second.handle, nullptr);
|
||||||
|
}
|
||||||
|
if (kv.second.storage.buffer)
|
||||||
|
{
|
||||||
|
_resources->destroy_buffer(kv.second.storage);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_blasByVB.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
static VkDeviceAddress get_buffer_address(VkDevice dev, VkBuffer buf)
|
||||||
|
{
|
||||||
|
VkBufferDeviceAddressInfo info{VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO};
|
||||||
|
info.buffer = buf;
|
||||||
|
return vkGetBufferDeviceAddress(dev, &info);
|
||||||
|
}
|
||||||
|
|
||||||
|
AccelStructureHandle RayTracingManager::getOrBuildBLAS(const std::shared_ptr<MeshAsset> &mesh)
|
||||||
|
{
|
||||||
|
if (!mesh) return {};
|
||||||
|
VkBuffer vb = mesh->meshBuffers.vertexBuffer.buffer;
|
||||||
|
if (auto it = _blasByVB.find(vb); it != _blasByVB.end())
|
||||||
|
{
|
||||||
|
return it->second;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build BLAS with one geometry per surface
|
||||||
|
std::vector<VkAccelerationStructureGeometryKHR> geoms;
|
||||||
|
std::vector<VkAccelerationStructureBuildRangeInfoKHR> ranges;
|
||||||
|
geoms.reserve(mesh->surfaces.size());
|
||||||
|
ranges.reserve(mesh->surfaces.size());
|
||||||
|
|
||||||
|
VkDeviceAddress vaddr = mesh->meshBuffers.vertexBufferAddress;
|
||||||
|
VkDeviceAddress iaddr = mesh->meshBuffers.indexBufferAddress;
|
||||||
|
const uint32_t vcount = mesh->meshBuffers.vertexCount;
|
||||||
|
|
||||||
|
for (const auto &s: mesh->surfaces)
|
||||||
|
{
|
||||||
|
VkAccelerationStructureGeometryTrianglesDataKHR tri{
|
||||||
|
VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR
|
||||||
|
};
|
||||||
|
tri.vertexFormat = VK_FORMAT_R32G32B32_SFLOAT;
|
||||||
|
tri.vertexData.deviceAddress = vaddr;
|
||||||
|
tri.vertexStride = sizeof(Vertex);
|
||||||
|
tri.maxVertex = vcount ? (vcount - 1) : 0; // conservative
|
||||||
|
tri.indexType = VK_INDEX_TYPE_UINT32;
|
||||||
|
tri.indexData.deviceAddress = iaddr + static_cast<VkDeviceAddress>(s.startIndex) * sizeof(uint32_t);
|
||||||
|
tri.transformData.deviceAddress = 0; // identity
|
||||||
|
|
||||||
|
VkAccelerationStructureGeometryKHR g{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR};
|
||||||
|
g.geometryType = VK_GEOMETRY_TYPE_TRIANGLES_KHR;
|
||||||
|
g.flags = VK_GEOMETRY_OPAQUE_BIT_KHR;
|
||||||
|
g.geometry.triangles = tri;
|
||||||
|
|
||||||
|
geoms.push_back(g);
|
||||||
|
|
||||||
|
VkAccelerationStructureBuildRangeInfoKHR r{};
|
||||||
|
r.primitiveCount = s.count / 3;
|
||||||
|
r.primitiveOffset = 0; // encoded through indexData deviceAddress
|
||||||
|
r.firstVertex = 0;
|
||||||
|
r.transformOffset = 0;
|
||||||
|
ranges.push_back(r);
|
||||||
|
}
|
||||||
|
|
||||||
|
VkAccelerationStructureBuildGeometryInfoKHR buildInfo{
|
||||||
|
VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR
|
||||||
|
};
|
||||||
|
buildInfo.type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR;
|
||||||
|
buildInfo.flags = VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_KHR;
|
||||||
|
buildInfo.mode = VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR;
|
||||||
|
buildInfo.geometryCount = static_cast<uint32_t>(geoms.size());
|
||||||
|
buildInfo.pGeometries = geoms.data();
|
||||||
|
|
||||||
|
std::vector<uint32_t> maxPrim(geoms.size());
|
||||||
|
for (size_t i = 0; i < ranges.size(); ++i) maxPrim[i] = ranges[i].primitiveCount;
|
||||||
|
|
||||||
|
VkAccelerationStructureBuildSizesInfoKHR sizes{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR};
|
||||||
|
_vkGetAccelerationStructureBuildSizesKHR(_device->device(), VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR,
|
||||||
|
&buildInfo, maxPrim.data(), &sizes);
|
||||||
|
|
||||||
|
// allocate AS storage and scratch
|
||||||
|
AccelStructureHandle blas{};
|
||||||
|
blas.storage = _resources->create_buffer(sizes.accelerationStructureSize,
|
||||||
|
VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR |
|
||||||
|
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
|
||||||
|
VMA_MEMORY_USAGE_GPU_ONLY);
|
||||||
|
|
||||||
|
VkAccelerationStructureCreateInfoKHR asci{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR};
|
||||||
|
asci.type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR;
|
||||||
|
asci.buffer = blas.storage.buffer;
|
||||||
|
asci.size = sizes.accelerationStructureSize;
|
||||||
|
VK_CHECK(_vkCreateAccelerationStructureKHR(_device->device(), &asci, nullptr, &blas.handle));
|
||||||
|
|
||||||
|
AllocatedBuffer scratch = _resources->create_buffer(sizes.buildScratchSize,
|
||||||
|
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
|
||||||
|
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
|
||||||
|
VMA_MEMORY_USAGE_GPU_ONLY);
|
||||||
|
VkDeviceAddress scratchAddr = get_buffer_address(_device->device(), scratch.buffer);
|
||||||
|
|
||||||
|
buildInfo.dstAccelerationStructure = blas.handle;
|
||||||
|
buildInfo.scratchData.deviceAddress = scratchAddr;
|
||||||
|
|
||||||
|
// build with immediate submit
|
||||||
|
std::vector<const VkAccelerationStructureBuildRangeInfoKHR *> pRanges(geoms.size());
|
||||||
|
for (size_t i = 0; i < geoms.size(); ++i) pRanges[i] = &ranges[i];
|
||||||
|
_resources->immediate_submit([&](VkCommandBuffer cmd) {
|
||||||
|
_vkCmdBuildAccelerationStructuresKHR(cmd, 1, &buildInfo, pRanges.data());
|
||||||
|
});
|
||||||
|
|
||||||
|
// destroy scratch
|
||||||
|
_resources->destroy_buffer(scratch);
|
||||||
|
|
||||||
|
// device address
|
||||||
|
VkAccelerationStructureDeviceAddressInfoKHR dai{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_DEVICE_ADDRESS_INFO_KHR};
|
||||||
|
dai.accelerationStructure = blas.handle;
|
||||||
|
blas.deviceAddress = _vkGetAccelerationStructureDeviceAddressKHR(_device->device(), &dai);
|
||||||
|
|
||||||
|
_blasByVB.emplace(vb, blas);
|
||||||
|
return blas;
|
||||||
|
}
|
||||||
|
|
||||||
|
void RayTracingManager::ensure_tlas_storage(VkDeviceSize requiredASSize, VkDeviceSize /*requiredScratch*/)
|
||||||
|
{
|
||||||
|
// Simple: recreate TLAS storage if size grows
|
||||||
|
if (_tlas.handle)
|
||||||
|
{
|
||||||
|
_vkDestroyAccelerationStructureKHR(_device->device(), _tlas.handle, nullptr);
|
||||||
|
_tlas.handle = VK_NULL_HANDLE;
|
||||||
|
}
|
||||||
|
if (_tlas.storage.buffer)
|
||||||
|
{
|
||||||
|
_resources->destroy_buffer(_tlas.storage);
|
||||||
|
_tlas.storage = {};
|
||||||
|
}
|
||||||
|
_tlas.storage = _resources->create_buffer(requiredASSize,
|
||||||
|
VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR |
|
||||||
|
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
|
||||||
|
VMA_MEMORY_USAGE_GPU_ONLY);
|
||||||
|
|
||||||
|
VkAccelerationStructureCreateInfoKHR asci{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR};
|
||||||
|
asci.type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR;
|
||||||
|
asci.buffer = _tlas.storage.buffer;
|
||||||
|
asci.size = requiredASSize;
|
||||||
|
VK_CHECK(_vkCreateAccelerationStructureKHR(_device->device(), &asci, nullptr, &_tlas.handle));
|
||||||
|
}
|
||||||
|
|
||||||
|
VkAccelerationStructureKHR RayTracingManager::buildTLASFromDrawContext(const DrawContext &dc)
|
||||||
|
{
|
||||||
|
// Collect instances; one per render object (opaque only).
|
||||||
|
std::vector<VkAccelerationStructureInstanceKHR> instances;
|
||||||
|
instances.reserve(dc.OpaqueSurfaces.size());
|
||||||
|
|
||||||
|
for (const auto &r: dc.OpaqueSurfaces)
|
||||||
|
{
|
||||||
|
// Find mesh BLAS by vertex buffer
|
||||||
|
AccelStructureHandle blas{};
|
||||||
|
// We don't have MeshAsset pointer here; BLAS cache is keyed by VB handle; if missing, skip
|
||||||
|
auto it = _blasByVB.find(r.vertexBuffer);
|
||||||
|
if (it == _blasByVB.end())
|
||||||
|
{
|
||||||
|
// Can't build BLAS on the fly without mesh topology; skip this instance
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
blas = it->second;
|
||||||
|
|
||||||
|
VkAccelerationStructureInstanceKHR inst{};
|
||||||
|
// Fill 3x4 row-major from GLM column-major mat4
|
||||||
|
const glm::mat4 &m = r.transform;
|
||||||
|
for (int row = 0; row < 3; ++row)
|
||||||
|
for (int col = 0; col < 4; ++col)
|
||||||
|
inst.transform.matrix[row][col] = m[col][row];
|
||||||
|
|
||||||
|
inst.instanceCustomIndex = 0;
|
||||||
|
inst.mask = 0xFF;
|
||||||
|
inst.instanceShaderBindingTableRecordOffset = 0;
|
||||||
|
inst.flags = VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR; // two-sided
|
||||||
|
inst.accelerationStructureReference = blas.deviceAddress;
|
||||||
|
instances.push_back(inst);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (instances.empty())
|
||||||
|
{
|
||||||
|
// nothing to build
|
||||||
|
return _tlas.handle;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ensure instance buffer capacity
|
||||||
|
if (instances.size() > _tlasInstanceCapacity)
|
||||||
|
{
|
||||||
|
if (_tlasInstanceBuffer.buffer)
|
||||||
|
{
|
||||||
|
_resources->destroy_buffer(_tlasInstanceBuffer);
|
||||||
|
}
|
||||||
|
_tlasInstanceCapacity = instances.size();
|
||||||
|
_tlasInstanceBuffer = _resources->create_buffer(
|
||||||
|
_tlasInstanceCapacity * sizeof(VkAccelerationStructureInstanceKHR),
|
||||||
|
VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR |
|
||||||
|
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT |
|
||||||
|
VK_BUFFER_USAGE_TRANSFER_DST_BIT,
|
||||||
|
VMA_MEMORY_USAGE_CPU_TO_GPU);
|
||||||
|
}
|
||||||
|
|
||||||
|
// upload instances
|
||||||
|
{
|
||||||
|
VmaAllocationInfo ai{};
|
||||||
|
vmaGetAllocationInfo(_device->allocator(), _tlasInstanceBuffer.allocation, &ai);
|
||||||
|
std::memcpy(ai.pMappedData, instances.data(), instances.size() * sizeof(instances[0]));
|
||||||
|
vmaFlushAllocation(_device->allocator(), _tlasInstanceBuffer.allocation, 0, VK_WHOLE_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
|
VkDeviceAddress instAddr = get_buffer_address(_device->device(), _tlasInstanceBuffer.buffer);
|
||||||
|
|
||||||
|
VkAccelerationStructureGeometryInstancesDataKHR instData{
|
||||||
|
VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_INSTANCES_DATA_KHR
|
||||||
|
};
|
||||||
|
instData.arrayOfPointers = VK_FALSE;
|
||||||
|
instData.data.deviceAddress = instAddr;
|
||||||
|
|
||||||
|
VkAccelerationStructureGeometryKHR geom{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR};
|
||||||
|
geom.geometryType = VK_GEOMETRY_TYPE_INSTANCES_KHR;
|
||||||
|
geom.geometry.instances = instData;
|
||||||
|
|
||||||
|
VkAccelerationStructureBuildGeometryInfoKHR buildInfo{
|
||||||
|
VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR
|
||||||
|
};
|
||||||
|
buildInfo.type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR;
|
||||||
|
buildInfo.flags = VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_KHR;
|
||||||
|
buildInfo.mode = VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR;
|
||||||
|
buildInfo.geometryCount = 1;
|
||||||
|
buildInfo.pGeometries = &geom;
|
||||||
|
|
||||||
|
uint32_t primCount = static_cast<uint32_t>(instances.size());
|
||||||
|
VkAccelerationStructureBuildSizesInfoKHR sizes{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR};
|
||||||
|
_vkGetAccelerationStructureBuildSizesKHR(_device->device(), VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR,
|
||||||
|
&buildInfo, &primCount, &sizes);
|
||||||
|
|
||||||
|
ensure_tlas_storage(sizes.accelerationStructureSize, sizes.buildScratchSize);
|
||||||
|
|
||||||
|
buildInfo.dstAccelerationStructure = _tlas.handle;
|
||||||
|
AllocatedBuffer scratch = _resources->create_buffer(sizes.buildScratchSize,
|
||||||
|
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
|
||||||
|
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
|
||||||
|
VMA_MEMORY_USAGE_GPU_ONLY);
|
||||||
|
VkDeviceAddress scratchAddr = get_buffer_address(_device->device(), scratch.buffer);
|
||||||
|
buildInfo.scratchData.deviceAddress = scratchAddr;
|
||||||
|
|
||||||
|
VkAccelerationStructureBuildRangeInfoKHR range{};
|
||||||
|
range.primitiveCount = primCount;
|
||||||
|
const VkAccelerationStructureBuildRangeInfoKHR *pRange = ⦥
|
||||||
|
|
||||||
|
_resources->immediate_submit([&](VkCommandBuffer cmd) {
|
||||||
|
_vkCmdBuildAccelerationStructuresKHR(cmd, 1, &buildInfo, &pRange);
|
||||||
|
});
|
||||||
|
_resources->destroy_buffer(scratch);
|
||||||
|
|
||||||
|
VkAccelerationStructureDeviceAddressInfoKHR dai{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_DEVICE_ADDRESS_INFO_KHR};
|
||||||
|
dai.accelerationStructure = _tlas.handle;
|
||||||
|
_tlas.deviceAddress = _vkGetAccelerationStructureDeviceAddressKHR(_device->device(), &dai);
|
||||||
|
|
||||||
|
return _tlas.handle;
|
||||||
|
}
|
||||||
52
src/core/vk_raytracing.h
Normal file
52
src/core/vk_raytracing.h
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <core/vk_types.h>
|
||||||
|
#include <unordered_map>
|
||||||
|
#include <vector>
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
class DeviceManager;
|
||||||
|
class ResourceManager;
|
||||||
|
struct DrawContext;
|
||||||
|
struct MeshAsset;
|
||||||
|
|
||||||
|
struct AccelStructureHandle {
|
||||||
|
VkAccelerationStructureKHR handle{VK_NULL_HANDLE};
|
||||||
|
AllocatedBuffer storage{}; // buffer that backs the AS
|
||||||
|
VkDeviceAddress deviceAddress{0};
|
||||||
|
};
|
||||||
|
|
||||||
|
class RayTracingManager {
|
||||||
|
public:
|
||||||
|
void init(DeviceManager* dev, ResourceManager* res);
|
||||||
|
void cleanup();
|
||||||
|
|
||||||
|
// Build (or get) BLAS for a mesh. Safe to call multiple times.
|
||||||
|
AccelStructureHandle getOrBuildBLAS(const std::shared_ptr<MeshAsset>& mesh);
|
||||||
|
|
||||||
|
// Rebuild TLAS from current draw context; returns TLAS handle (or null if unavailable)
|
||||||
|
VkAccelerationStructureKHR buildTLASFromDrawContext(const DrawContext& dc);
|
||||||
|
VkAccelerationStructureKHR tlas() const { return _tlas.handle; }
|
||||||
|
VkDeviceAddress tlasAddress() const { return _tlas.deviceAddress; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
// function pointers (resolved on init)
|
||||||
|
PFN_vkCreateAccelerationStructureKHR _vkCreateAccelerationStructureKHR{};
|
||||||
|
PFN_vkDestroyAccelerationStructureKHR _vkDestroyAccelerationStructureKHR{};
|
||||||
|
PFN_vkGetAccelerationStructureBuildSizesKHR _vkGetAccelerationStructureBuildSizesKHR{};
|
||||||
|
PFN_vkCmdBuildAccelerationStructuresKHR _vkCmdBuildAccelerationStructuresKHR{};
|
||||||
|
PFN_vkGetAccelerationStructureDeviceAddressKHR _vkGetAccelerationStructureDeviceAddressKHR{};
|
||||||
|
|
||||||
|
DeviceManager* _device{nullptr};
|
||||||
|
ResourceManager* _resources{nullptr};
|
||||||
|
|
||||||
|
// BLAS cache by vertex buffer handle
|
||||||
|
std::unordered_map<VkBuffer, AccelStructureHandle> _blasByVB;
|
||||||
|
|
||||||
|
// TLAS + scratch / instance buffer (rebuilt per frame)
|
||||||
|
AccelStructureHandle _tlas{};
|
||||||
|
AllocatedBuffer _tlasInstanceBuffer{};
|
||||||
|
size_t _tlasInstanceCapacity{0};
|
||||||
|
|
||||||
|
void ensure_tlas_storage(VkDeviceSize requiredASSize, VkDeviceSize requiredScratch);
|
||||||
|
};
|
||||||
|
|
||||||
@@ -179,8 +179,10 @@ GPUMeshBuffers ResourceManager::uploadMesh(std::span<uint32_t> indices, std::spa
|
|||||||
|
|
||||||
//create vertex buffer
|
//create vertex buffer
|
||||||
newSurface.vertexBuffer = create_buffer(vertexBufferSize,
|
newSurface.vertexBuffer = create_buffer(vertexBufferSize,
|
||||||
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
|
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
|
||||||
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
|
VK_BUFFER_USAGE_TRANSFER_DST_BIT |
|
||||||
|
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT |
|
||||||
|
VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR,
|
||||||
VMA_MEMORY_USAGE_GPU_ONLY);
|
VMA_MEMORY_USAGE_GPU_ONLY);
|
||||||
|
|
||||||
//find the adress of the vertex buffer
|
//find the adress of the vertex buffer
|
||||||
@@ -191,8 +193,20 @@ GPUMeshBuffers ResourceManager::uploadMesh(std::span<uint32_t> indices, std::spa
|
|||||||
|
|
||||||
//create index buffer
|
//create index buffer
|
||||||
newSurface.indexBuffer = create_buffer(indexBufferSize,
|
newSurface.indexBuffer = create_buffer(indexBufferSize,
|
||||||
VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
|
VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
|
||||||
|
VK_BUFFER_USAGE_TRANSFER_DST_BIT |
|
||||||
|
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT |
|
||||||
|
VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR,
|
||||||
VMA_MEMORY_USAGE_GPU_ONLY);
|
VMA_MEMORY_USAGE_GPU_ONLY);
|
||||||
|
// index buffer device address (needed for acceleration structure builds)
|
||||||
|
{
|
||||||
|
VkBufferDeviceAddressInfo indexAddrInfo{ .sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO };
|
||||||
|
indexAddrInfo.buffer = newSurface.indexBuffer.buffer;
|
||||||
|
newSurface.indexBufferAddress = vkGetBufferDeviceAddress(_deviceManager->device(), &indexAddrInfo);
|
||||||
|
}
|
||||||
|
// store counts for AS builds
|
||||||
|
newSurface.vertexCount = static_cast<uint32_t>(vertices.size());
|
||||||
|
newSurface.indexCount = static_cast<uint32_t>(indices.size());
|
||||||
|
|
||||||
AllocatedBuffer staging = create_buffer(vertexBufferSize + indexBufferSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
|
AllocatedBuffer staging = create_buffer(vertexBufferSize + indexBufferSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
|
||||||
VMA_MEMORY_USAGE_CPU_ONLY);
|
VMA_MEMORY_USAGE_CPU_ONLY);
|
||||||
|
|||||||
@@ -78,6 +78,9 @@ struct GPUSceneData {
|
|||||||
|
|
||||||
glm::mat4 lightViewProjCascades[4];
|
glm::mat4 lightViewProjCascades[4];
|
||||||
glm::vec4 cascadeSplitsView;
|
glm::vec4 cascadeSplitsView;
|
||||||
|
// Hybrid ray-query options (match shaders/input_structures.glsl)
|
||||||
|
glm::uvec4 rtOptions; // x: enabled (1/0), y: cascade mask, z,w: reserved
|
||||||
|
glm::vec4 rtParams; // x: N·L threshold, yzw: reserved
|
||||||
};
|
};
|
||||||
|
|
||||||
enum class MaterialPass :uint8_t {
|
enum class MaterialPass :uint8_t {
|
||||||
@@ -112,6 +115,9 @@ struct GPUMeshBuffers {
|
|||||||
AllocatedBuffer indexBuffer;
|
AllocatedBuffer indexBuffer;
|
||||||
AllocatedBuffer vertexBuffer;
|
AllocatedBuffer vertexBuffer;
|
||||||
VkDeviceAddress vertexBufferAddress;
|
VkDeviceAddress vertexBufferAddress;
|
||||||
|
VkDeviceAddress indexBufferAddress;
|
||||||
|
uint32_t vertexCount{0};
|
||||||
|
uint32_t indexCount{0};
|
||||||
};
|
};
|
||||||
|
|
||||||
// push constants for our mesh object draws
|
// push constants for our mesh object draws
|
||||||
|
|||||||
@@ -18,6 +18,8 @@
|
|||||||
#include "render/rg_graph.h"
|
#include "render/rg_graph.h"
|
||||||
#include <array>
|
#include <array>
|
||||||
|
|
||||||
|
#include "vk_raytracing.h"
|
||||||
|
|
||||||
void LightingPass::init(EngineContext *context)
|
void LightingPass::init(EngineContext *context)
|
||||||
{
|
{
|
||||||
_context = context;
|
_context = context;
|
||||||
@@ -166,6 +168,15 @@ void LightingPass::draw_lighting(VkCommandBuffer cmd,
|
|||||||
deviceManager->device(), descriptorLayouts->gpuSceneDataLayout());
|
deviceManager->device(), descriptorLayouts->gpuSceneDataLayout());
|
||||||
DescriptorWriter writer;
|
DescriptorWriter writer;
|
||||||
writer.write_buffer(0, gpuSceneDataBuffer.buffer, sizeof(GPUSceneData), 0, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER);
|
writer.write_buffer(0, gpuSceneDataBuffer.buffer, sizeof(GPUSceneData), 0, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER);
|
||||||
|
// If TLAS available and feature enabled, bind it at (set=0,binding=1)
|
||||||
|
if (ctxLocal->ray && ctxLocal->getDevice()->supportsAccelerationStructure() && ctxLocal->shadowSettings.mode != 0u)
|
||||||
|
{
|
||||||
|
VkAccelerationStructureKHR tlas = ctxLocal->ray->tlas();
|
||||||
|
if (tlas != VK_NULL_HANDLE)
|
||||||
|
{
|
||||||
|
writer.write_acceleration_structure(1, tlas);
|
||||||
|
}
|
||||||
|
}
|
||||||
writer.update_set(deviceManager->device(), globalDescriptor);
|
writer.update_set(deviceManager->device(), globalDescriptor);
|
||||||
|
|
||||||
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, _pipeline);
|
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, _pipeline);
|
||||||
|
|||||||
@@ -488,6 +488,10 @@ std::optional<std::shared_ptr<LoadedGLTF> > loadGltf(VulkanEngine *engine, std::
|
|||||||
}
|
}
|
||||||
|
|
||||||
newmesh->meshBuffers = engine->_resourceManager->uploadMesh(indices, vertices);
|
newmesh->meshBuffers = engine->_resourceManager->uploadMesh(indices, vertices);
|
||||||
|
if (engine->_rayManager)
|
||||||
|
{
|
||||||
|
engine->_rayManager->getOrBuildBLAS(newmesh);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
//> load_nodes
|
//> load_nodes
|
||||||
// load all nodes and their meshes
|
// load all nodes and their meshes
|
||||||
|
|||||||
@@ -171,6 +171,15 @@ void SceneManager::update_scene()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Publish shadow/RT settings to SceneData
|
||||||
|
if (_context)
|
||||||
|
{
|
||||||
|
const auto &ss = _context->shadowSettings;
|
||||||
|
const uint32_t rtEnabled = (ss.mode != 0) ? 1u : 0u;
|
||||||
|
sceneData.rtOptions = glm::uvec4(rtEnabled, ss.hybridRayCascadesMask, ss.mode, 0u);
|
||||||
|
sceneData.rtParams = glm::vec4(ss.hybridRayNoLThreshold, 0.0f, 0.0f, 0.0f);
|
||||||
|
}
|
||||||
|
|
||||||
auto end = std::chrono::system_clock::now();
|
auto end = std::chrono::system_clock::now();
|
||||||
auto elapsed = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
|
auto elapsed = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
|
||||||
stats.scene_update_time = elapsed.count() / 1000.f;
|
stats.scene_update_time = elapsed.count() / 1000.f;
|
||||||
|
|||||||
Reference in New Issue
Block a user