ADD: Clipmap shadow tuning

This commit is contained in:
2025-10-25 00:54:47 +09:00
parent 6e08297866
commit 3127658b01
5 changed files with 117 additions and 18 deletions

View File

@@ -8,7 +8,6 @@ layout(location=0) out vec4 outColor;
layout(set=1, binding=0) uniform sampler2D posTex;
layout(set=1, binding=1) uniform sampler2D normalTex;
layout(set=1, binding=2) uniform sampler2D albedoTex;
// Mixed near + CSM: shadowTex[0] is the near/simple map, 1..N-1 are cascades
layout(set=2, binding=0) uniform sampler2D shadowTex[4];
const float PI = 3.14159265359;
@@ -42,7 +41,7 @@ uint selectCascadeIndex(vec3 worldPos)
return i;
}
}
return 3u; // fallback to farthest level
return 3u;
}
float calcShadowVisibility(vec3 worldPos, vec3 N, vec3 L)
@@ -71,7 +70,7 @@ float calcShadowVisibility(vec3 worldPos, vec3 N, vec3 L)
vec2 texelSize = 1.0 / vec2(dim);
float baseRadius = 1.25;
// Slightly increase filter for farther cascades
float radius = mix(baseRadius, baseRadius * 3.0, float(ci) / 3.0);
float ang = hash12(suv * 4096.0) * 6.2831853;

View File

@@ -10,14 +10,9 @@ inline constexpr bool kUseValidationLayers = true;
// Shadow mapping configuration
inline constexpr int kShadowCascadeCount = 4;
// Maximum shadow distance for CSM in view-space units
inline constexpr float kShadowCSMFar = 400.0f;
inline constexpr float kShadowCSMFar = 800.0f;
// Shadow map resolution used for stabilization (texel snapping). Must match actual image size.
inline constexpr float kShadowMapResolution = 2048.0f;
// Extra XY expansion for cascade footprint (safety against FOV/aspect changes)
inline constexpr float kShadowCascadeRadiusScale = 2.5f;
// Additive XY margin in world units (light-space) beyond scaled radius
inline constexpr float kShadowCascadeRadiusMargin = 40.0f;
// Clipmap shadow configuration (used when cascades operate in clipmap mode)
// Base coverage radius of level 0 around the camera (world units). Each level doubles the radius.
inline constexpr float kShadowClipBaseRadius = 20.0f;

View File

@@ -128,7 +128,7 @@ void VulkanEngine::init()
auto imguiPass = std::make_unique<ImGuiPass>();
_renderPassManager->setImGuiPass(std::move(imguiPass));
const std::string structurePath = _assetManager->modelPath("police_office.glb");
const std::string structurePath = _assetManager->modelPath("resi.glb");
const auto structureFile = _assetManager->loadGLTF(structurePath);
assert(structureFile.has_value());
@@ -263,6 +263,11 @@ void VulkanEngine::draw()
VK_CHECK(vkWaitForFences(_deviceManager->device(), 1, &get_current_frame()._renderFence, true, 1000000000));
get_current_frame()._deletionQueue.flush();
// Resolve last frame's pass timings before we clear and rebuild the graph
if (_renderGraph)
{
_renderGraph->resolve_timings();
}
get_current_frame()._frameDescriptors.clear_pools(_deviceManager->device());
//< frame_clear
@@ -515,13 +520,15 @@ void VulkanEngine::run()
ImGui::SameLine();
ImGui::Text("%zu passes", passInfos.size());
if (ImGui::BeginTable("passes", 6, ImGuiTableFlags_RowBg | ImGuiTableFlags_SizingStretchProp))
if (ImGui::BeginTable("passes", 8, ImGuiTableFlags_RowBg | ImGuiTableFlags_SizingStretchProp))
{
ImGui::TableSetupColumn("Enable", ImGuiTableColumnFlags_WidthFixed, 70);
ImGui::TableSetupColumn("Name");
ImGui::TableSetupColumn("Type", ImGuiTableColumnFlags_WidthFixed, 90);
ImGui::TableSetupColumn("Imgs", ImGuiTableColumnFlags_WidthFixed, 60);
ImGui::TableSetupColumn("Bufs", ImGuiTableColumnFlags_WidthFixed, 60);
ImGui::TableSetupColumn("Type", ImGuiTableColumnFlags_WidthFixed, 80);
ImGui::TableSetupColumn("GPU ms", ImGuiTableColumnFlags_WidthFixed, 70);
ImGui::TableSetupColumn("CPU rec ms", ImGuiTableColumnFlags_WidthFixed, 90);
ImGui::TableSetupColumn("Imgs", ImGuiTableColumnFlags_WidthFixed, 55);
ImGui::TableSetupColumn("Bufs", ImGuiTableColumnFlags_WidthFixed, 55);
ImGui::TableSetupColumn("Attachments", ImGuiTableColumnFlags_WidthFixed, 100);
ImGui::TableHeadersRow();
@@ -551,10 +558,14 @@ void VulkanEngine::run()
ImGui::TableSetColumnIndex(2);
ImGui::TextUnformatted(typeName(pi.type));
ImGui::TableSetColumnIndex(3);
ImGui::Text("%u/%u", pi.imageReads, pi.imageWrites);
if (pi.gpuMillis >= 0.0f) ImGui::Text("%.2f", pi.gpuMillis); else ImGui::TextUnformatted("-");
ImGui::TableSetColumnIndex(4);
ImGui::Text("%u/%u", pi.bufferReads, pi.bufferWrites);
if (pi.cpuMillis >= 0.0f) ImGui::Text("%.2f", pi.cpuMillis); else ImGui::TextUnformatted("-");
ImGui::TableSetColumnIndex(5);
ImGui::Text("%u/%u", pi.imageReads, pi.imageWrites);
ImGui::TableSetColumnIndex(6);
ImGui::Text("%u/%u", pi.bufferReads, pi.bufferWrites);
ImGui::TableSetColumnIndex(7);
ImGui::Text("%u%s", pi.colorAttachmentCount, pi.hasDepth ? "+D" : "");
}
ImGui::EndTable();

View File

@@ -15,6 +15,7 @@
#include <fmt/core.h>
#include "vk_device.h"
#include <chrono>
void RenderGraph::init(EngineContext *ctx)
{
@@ -603,6 +604,25 @@ bool RenderGraph::compile()
void RenderGraph::execute(VkCommandBuffer cmd)
{
// Create/reset timestamp query pool for this execution (2 queries per pass)
if (_timestampPool != VK_NULL_HANDLE)
{
vkDestroyQueryPool(_context->getDevice()->device(), _timestampPool, nullptr);
_timestampPool = VK_NULL_HANDLE;
}
const uint32_t queryCount = static_cast<uint32_t>(_passes.size() * 2);
if (queryCount > 0)
{
VkQueryPoolCreateInfo qpci{ .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO };
qpci.queryType = VK_QUERY_TYPE_TIMESTAMP;
qpci.queryCount = queryCount;
VK_CHECK(vkCreateQueryPool(_context->getDevice()->device(), &qpci, nullptr, &_timestampPool));
vkCmdResetQueryPool(cmd, _timestampPool, 0, queryCount);
}
_lastCpuMillis.assign(_passes.size(), -1.0f);
_wroteTimestamps.assign(_passes.size(), false);
for (size_t passIndex = 0; passIndex < _passes.size(); ++passIndex)
{
auto &p = _passes[passIndex];
@@ -626,6 +646,14 @@ void RenderGraph::execute(VkCommandBuffer cmd)
vkCmdPipelineBarrier2(cmd, &dep);
}
// Timestamp begin and CPU start after barriers
if (_timestampPool != VK_NULL_HANDLE)
{
const uint32_t qidx = static_cast<uint32_t>(passIndex * 2 + 0);
vkCmdWriteTimestamp2(cmd, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, _timestampPool, qidx);
}
auto cpuStart = std::chrono::high_resolution_clock::now();
// Begin dynamic rendering if the pass declared attachments
bool doRendering = (!p.colorAttachments.empty() || p.hasDepth);
if (doRendering)
@@ -716,6 +744,16 @@ void RenderGraph::execute(VkCommandBuffer cmd)
vkCmdEndRendering(cmd);
}
// CPU end and timestamp end
auto cpuEnd = std::chrono::high_resolution_clock::now();
_lastCpuMillis[passIndex] = std::chrono::duration<float, std::milli>(cpuEnd - cpuStart).count();
if (_timestampPool != VK_NULL_HANDLE)
{
const uint32_t qidx = static_cast<uint32_t>(passIndex * 2 + 1);
vkCmdWriteTimestamp2(cmd, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, _timestampPool, qidx);
_wroteTimestamps[passIndex] = true;
}
if (_context && _context->getDevice())
{
vkdebug::cmd_end_label(_context->getDevice()->device(), cmd);
@@ -788,6 +826,9 @@ void RenderGraph::debug_get_passes(std::vector<RGDebugPassInfo> &out) const
info.bufferWrites = static_cast<uint32_t>(p.bufferWrites.size());
info.colorAttachmentCount = static_cast<uint32_t>(p.colorAttachments.size());
info.hasDepth = p.hasDepth;
size_t idx = &p - _passes.data();
if (idx < _lastGpuMillis.size()) info.gpuMillis = _lastGpuMillis[idx];
if (idx < _lastCpuMillis.size()) info.cpuMillis = _lastCpuMillis[idx];
out.push_back(std::move(info));
}
}
@@ -894,3 +935,44 @@ RGImageHandle RenderGraph::import_swapchain_image(uint32_t index)
d.currentLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
return import_image(d);
}
void RenderGraph::resolve_timings()
{
if (_timestampPool == VK_NULL_HANDLE || _passes.empty())
{
_lastGpuMillis.assign(_passes.size(), -1.0f);
return;
}
const uint32_t queryCount = static_cast<uint32_t>(_passes.size() * 2);
std::vector<uint64_t> results(queryCount, 0);
VkResult r = vkGetQueryPoolResults(
_context->getDevice()->device(), _timestampPool,
0, queryCount,
sizeof(uint64_t) * results.size(), results.data(), sizeof(uint64_t),
VK_QUERY_RESULT_64_BIT);
// Convert ticks to ms
VkPhysicalDeviceProperties props{};
vkGetPhysicalDeviceProperties(_context->getDevice()->physicalDevice(), &props);
const double tickNs = props.limits.timestampPeriod;
_lastGpuMillis.assign(_passes.size(), -1.0f);
for (size_t i = 0; i < _passes.size(); ++i)
{
if (!_wroteTimestamps.empty() && !_wroteTimestamps[i]) { _lastGpuMillis[i] = -1.0f; continue; }
const uint64_t t0 = results[i*2 + 0];
const uint64_t t1 = results[i*2 + 1];
if (t1 > t0)
{
double ns = double(t1 - t0) * tickNs;
_lastGpuMillis[i] = static_cast<float>(ns / 1.0e6);
}
else
{
_lastGpuMillis[i] = -1.0f;
}
}
vkDestroyQueryPool(_context->getDevice()->device(), _timestampPool, nullptr);
_timestampPool = VK_NULL_HANDLE;
}

View File

@@ -70,6 +70,9 @@ struct Pass; // fwd
uint32_t bufferWrites = 0;
uint32_t colorAttachmentCount = 0;
bool hasDepth = false;
// Last frame timings (ms); -1 when unavailable
float gpuMillis = -1.0f;
float cpuMillis = -1.0f;
};
struct RGDebugImageInfo
@@ -104,6 +107,9 @@ struct Pass; // fwd
void debug_get_images(std::vector<RGDebugImageInfo>& out) const;
void debug_get_buffers(std::vector<RGDebugBufferInfo>& out) const;
// Resolve GPU timestamps from the previous execute() call. Call after waiting on the render fence.
void resolve_timings();
private:
struct ImportedImage
{
@@ -139,4 +145,10 @@ private:
EngineContext* _context = nullptr;
RGResourceRegistry _resources;
std::vector<Pass> _passes;
// --- Timing data for last executed frame ---
VkQueryPool _timestampPool = VK_NULL_HANDLE; // holds 2 queries per pass (begin/end)
std::vector<float> _lastGpuMillis; // per pass
std::vector<float> _lastCpuMillis; // per pass (command recording time)
std::vector<bool> _wroteTimestamps; // per pass; true if queries were written in last execute
};