ADD: Clipmap shadow tuning
This commit is contained in:
@@ -8,7 +8,6 @@ layout(location=0) out vec4 outColor;
|
||||
layout(set=1, binding=0) uniform sampler2D posTex;
|
||||
layout(set=1, binding=1) uniform sampler2D normalTex;
|
||||
layout(set=1, binding=2) uniform sampler2D albedoTex;
|
||||
// Mixed near + CSM: shadowTex[0] is the near/simple map, 1..N-1 are cascades
|
||||
layout(set=2, binding=0) uniform sampler2D shadowTex[4];
|
||||
|
||||
const float PI = 3.14159265359;
|
||||
@@ -42,7 +41,7 @@ uint selectCascadeIndex(vec3 worldPos)
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return 3u; // fallback to farthest level
|
||||
return 3u;
|
||||
}
|
||||
|
||||
float calcShadowVisibility(vec3 worldPos, vec3 N, vec3 L)
|
||||
@@ -71,7 +70,7 @@ float calcShadowVisibility(vec3 worldPos, vec3 N, vec3 L)
|
||||
vec2 texelSize = 1.0 / vec2(dim);
|
||||
|
||||
float baseRadius = 1.25;
|
||||
// Slightly increase filter for farther cascades
|
||||
|
||||
float radius = mix(baseRadius, baseRadius * 3.0, float(ci) / 3.0);
|
||||
|
||||
float ang = hash12(suv * 4096.0) * 6.2831853;
|
||||
|
||||
@@ -10,14 +10,9 @@ inline constexpr bool kUseValidationLayers = true;
|
||||
// Shadow mapping configuration
|
||||
inline constexpr int kShadowCascadeCount = 4;
|
||||
// Maximum shadow distance for CSM in view-space units
|
||||
inline constexpr float kShadowCSMFar = 400.0f;
|
||||
inline constexpr float kShadowCSMFar = 800.0f;
|
||||
// Shadow map resolution used for stabilization (texel snapping). Must match actual image size.
|
||||
inline constexpr float kShadowMapResolution = 2048.0f;
|
||||
// Extra XY expansion for cascade footprint (safety against FOV/aspect changes)
|
||||
inline constexpr float kShadowCascadeRadiusScale = 2.5f;
|
||||
// Additive XY margin in world units (light-space) beyond scaled radius
|
||||
inline constexpr float kShadowCascadeRadiusMargin = 40.0f;
|
||||
|
||||
// Clipmap shadow configuration (used when cascades operate in clipmap mode)
|
||||
// Base coverage radius of level 0 around the camera (world units). Each level doubles the radius.
|
||||
inline constexpr float kShadowClipBaseRadius = 20.0f;
|
||||
|
||||
@@ -128,7 +128,7 @@ void VulkanEngine::init()
|
||||
auto imguiPass = std::make_unique<ImGuiPass>();
|
||||
_renderPassManager->setImGuiPass(std::move(imguiPass));
|
||||
|
||||
const std::string structurePath = _assetManager->modelPath("police_office.glb");
|
||||
const std::string structurePath = _assetManager->modelPath("resi.glb");
|
||||
const auto structureFile = _assetManager->loadGLTF(structurePath);
|
||||
|
||||
assert(structureFile.has_value());
|
||||
@@ -263,6 +263,11 @@ void VulkanEngine::draw()
|
||||
VK_CHECK(vkWaitForFences(_deviceManager->device(), 1, &get_current_frame()._renderFence, true, 1000000000));
|
||||
|
||||
get_current_frame()._deletionQueue.flush();
|
||||
// Resolve last frame's pass timings before we clear and rebuild the graph
|
||||
if (_renderGraph)
|
||||
{
|
||||
_renderGraph->resolve_timings();
|
||||
}
|
||||
get_current_frame()._frameDescriptors.clear_pools(_deviceManager->device());
|
||||
//< frame_clear
|
||||
|
||||
@@ -515,13 +520,15 @@ void VulkanEngine::run()
|
||||
ImGui::SameLine();
|
||||
ImGui::Text("%zu passes", passInfos.size());
|
||||
|
||||
if (ImGui::BeginTable("passes", 6, ImGuiTableFlags_RowBg | ImGuiTableFlags_SizingStretchProp))
|
||||
if (ImGui::BeginTable("passes", 8, ImGuiTableFlags_RowBg | ImGuiTableFlags_SizingStretchProp))
|
||||
{
|
||||
ImGui::TableSetupColumn("Enable", ImGuiTableColumnFlags_WidthFixed, 70);
|
||||
ImGui::TableSetupColumn("Name");
|
||||
ImGui::TableSetupColumn("Type", ImGuiTableColumnFlags_WidthFixed, 90);
|
||||
ImGui::TableSetupColumn("Imgs", ImGuiTableColumnFlags_WidthFixed, 60);
|
||||
ImGui::TableSetupColumn("Bufs", ImGuiTableColumnFlags_WidthFixed, 60);
|
||||
ImGui::TableSetupColumn("Type", ImGuiTableColumnFlags_WidthFixed, 80);
|
||||
ImGui::TableSetupColumn("GPU ms", ImGuiTableColumnFlags_WidthFixed, 70);
|
||||
ImGui::TableSetupColumn("CPU rec ms", ImGuiTableColumnFlags_WidthFixed, 90);
|
||||
ImGui::TableSetupColumn("Imgs", ImGuiTableColumnFlags_WidthFixed, 55);
|
||||
ImGui::TableSetupColumn("Bufs", ImGuiTableColumnFlags_WidthFixed, 55);
|
||||
ImGui::TableSetupColumn("Attachments", ImGuiTableColumnFlags_WidthFixed, 100);
|
||||
ImGui::TableHeadersRow();
|
||||
|
||||
@@ -551,10 +558,14 @@ void VulkanEngine::run()
|
||||
ImGui::TableSetColumnIndex(2);
|
||||
ImGui::TextUnformatted(typeName(pi.type));
|
||||
ImGui::TableSetColumnIndex(3);
|
||||
ImGui::Text("%u/%u", pi.imageReads, pi.imageWrites);
|
||||
if (pi.gpuMillis >= 0.0f) ImGui::Text("%.2f", pi.gpuMillis); else ImGui::TextUnformatted("-");
|
||||
ImGui::TableSetColumnIndex(4);
|
||||
ImGui::Text("%u/%u", pi.bufferReads, pi.bufferWrites);
|
||||
if (pi.cpuMillis >= 0.0f) ImGui::Text("%.2f", pi.cpuMillis); else ImGui::TextUnformatted("-");
|
||||
ImGui::TableSetColumnIndex(5);
|
||||
ImGui::Text("%u/%u", pi.imageReads, pi.imageWrites);
|
||||
ImGui::TableSetColumnIndex(6);
|
||||
ImGui::Text("%u/%u", pi.bufferReads, pi.bufferWrites);
|
||||
ImGui::TableSetColumnIndex(7);
|
||||
ImGui::Text("%u%s", pi.colorAttachmentCount, pi.hasDepth ? "+D" : "");
|
||||
}
|
||||
ImGui::EndTable();
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
#include <fmt/core.h>
|
||||
|
||||
#include "vk_device.h"
|
||||
#include <chrono>
|
||||
|
||||
void RenderGraph::init(EngineContext *ctx)
|
||||
{
|
||||
@@ -603,6 +604,25 @@ bool RenderGraph::compile()
|
||||
|
||||
void RenderGraph::execute(VkCommandBuffer cmd)
|
||||
{
|
||||
// Create/reset timestamp query pool for this execution (2 queries per pass)
|
||||
if (_timestampPool != VK_NULL_HANDLE)
|
||||
{
|
||||
vkDestroyQueryPool(_context->getDevice()->device(), _timestampPool, nullptr);
|
||||
_timestampPool = VK_NULL_HANDLE;
|
||||
}
|
||||
const uint32_t queryCount = static_cast<uint32_t>(_passes.size() * 2);
|
||||
if (queryCount > 0)
|
||||
{
|
||||
VkQueryPoolCreateInfo qpci{ .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO };
|
||||
qpci.queryType = VK_QUERY_TYPE_TIMESTAMP;
|
||||
qpci.queryCount = queryCount;
|
||||
VK_CHECK(vkCreateQueryPool(_context->getDevice()->device(), &qpci, nullptr, &_timestampPool));
|
||||
vkCmdResetQueryPool(cmd, _timestampPool, 0, queryCount);
|
||||
}
|
||||
|
||||
_lastCpuMillis.assign(_passes.size(), -1.0f);
|
||||
_wroteTimestamps.assign(_passes.size(), false);
|
||||
|
||||
for (size_t passIndex = 0; passIndex < _passes.size(); ++passIndex)
|
||||
{
|
||||
auto &p = _passes[passIndex];
|
||||
@@ -626,6 +646,14 @@ void RenderGraph::execute(VkCommandBuffer cmd)
|
||||
vkCmdPipelineBarrier2(cmd, &dep);
|
||||
}
|
||||
|
||||
// Timestamp begin and CPU start after barriers
|
||||
if (_timestampPool != VK_NULL_HANDLE)
|
||||
{
|
||||
const uint32_t qidx = static_cast<uint32_t>(passIndex * 2 + 0);
|
||||
vkCmdWriteTimestamp2(cmd, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, _timestampPool, qidx);
|
||||
}
|
||||
auto cpuStart = std::chrono::high_resolution_clock::now();
|
||||
|
||||
// Begin dynamic rendering if the pass declared attachments
|
||||
bool doRendering = (!p.colorAttachments.empty() || p.hasDepth);
|
||||
if (doRendering)
|
||||
@@ -716,6 +744,16 @@ void RenderGraph::execute(VkCommandBuffer cmd)
|
||||
vkCmdEndRendering(cmd);
|
||||
}
|
||||
|
||||
// CPU end and timestamp end
|
||||
auto cpuEnd = std::chrono::high_resolution_clock::now();
|
||||
_lastCpuMillis[passIndex] = std::chrono::duration<float, std::milli>(cpuEnd - cpuStart).count();
|
||||
if (_timestampPool != VK_NULL_HANDLE)
|
||||
{
|
||||
const uint32_t qidx = static_cast<uint32_t>(passIndex * 2 + 1);
|
||||
vkCmdWriteTimestamp2(cmd, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, _timestampPool, qidx);
|
||||
_wroteTimestamps[passIndex] = true;
|
||||
}
|
||||
|
||||
if (_context && _context->getDevice())
|
||||
{
|
||||
vkdebug::cmd_end_label(_context->getDevice()->device(), cmd);
|
||||
@@ -788,6 +826,9 @@ void RenderGraph::debug_get_passes(std::vector<RGDebugPassInfo> &out) const
|
||||
info.bufferWrites = static_cast<uint32_t>(p.bufferWrites.size());
|
||||
info.colorAttachmentCount = static_cast<uint32_t>(p.colorAttachments.size());
|
||||
info.hasDepth = p.hasDepth;
|
||||
size_t idx = &p - _passes.data();
|
||||
if (idx < _lastGpuMillis.size()) info.gpuMillis = _lastGpuMillis[idx];
|
||||
if (idx < _lastCpuMillis.size()) info.cpuMillis = _lastCpuMillis[idx];
|
||||
out.push_back(std::move(info));
|
||||
}
|
||||
}
|
||||
@@ -894,3 +935,44 @@ RGImageHandle RenderGraph::import_swapchain_image(uint32_t index)
|
||||
d.currentLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
|
||||
return import_image(d);
|
||||
}
|
||||
|
||||
void RenderGraph::resolve_timings()
|
||||
{
|
||||
if (_timestampPool == VK_NULL_HANDLE || _passes.empty())
|
||||
{
|
||||
_lastGpuMillis.assign(_passes.size(), -1.0f);
|
||||
return;
|
||||
}
|
||||
|
||||
const uint32_t queryCount = static_cast<uint32_t>(_passes.size() * 2);
|
||||
std::vector<uint64_t> results(queryCount, 0);
|
||||
VkResult r = vkGetQueryPoolResults(
|
||||
_context->getDevice()->device(), _timestampPool,
|
||||
0, queryCount,
|
||||
sizeof(uint64_t) * results.size(), results.data(), sizeof(uint64_t),
|
||||
VK_QUERY_RESULT_64_BIT);
|
||||
// Convert ticks to ms
|
||||
VkPhysicalDeviceProperties props{};
|
||||
vkGetPhysicalDeviceProperties(_context->getDevice()->physicalDevice(), &props);
|
||||
const double tickNs = props.limits.timestampPeriod;
|
||||
|
||||
_lastGpuMillis.assign(_passes.size(), -1.0f);
|
||||
for (size_t i = 0; i < _passes.size(); ++i)
|
||||
{
|
||||
if (!_wroteTimestamps.empty() && !_wroteTimestamps[i]) { _lastGpuMillis[i] = -1.0f; continue; }
|
||||
const uint64_t t0 = results[i*2 + 0];
|
||||
const uint64_t t1 = results[i*2 + 1];
|
||||
if (t1 > t0)
|
||||
{
|
||||
double ns = double(t1 - t0) * tickNs;
|
||||
_lastGpuMillis[i] = static_cast<float>(ns / 1.0e6);
|
||||
}
|
||||
else
|
||||
{
|
||||
_lastGpuMillis[i] = -1.0f;
|
||||
}
|
||||
}
|
||||
|
||||
vkDestroyQueryPool(_context->getDevice()->device(), _timestampPool, nullptr);
|
||||
_timestampPool = VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
@@ -70,6 +70,9 @@ struct Pass; // fwd
|
||||
uint32_t bufferWrites = 0;
|
||||
uint32_t colorAttachmentCount = 0;
|
||||
bool hasDepth = false;
|
||||
// Last frame timings (ms); -1 when unavailable
|
||||
float gpuMillis = -1.0f;
|
||||
float cpuMillis = -1.0f;
|
||||
};
|
||||
|
||||
struct RGDebugImageInfo
|
||||
@@ -104,6 +107,9 @@ struct Pass; // fwd
|
||||
void debug_get_images(std::vector<RGDebugImageInfo>& out) const;
|
||||
void debug_get_buffers(std::vector<RGDebugBufferInfo>& out) const;
|
||||
|
||||
// Resolve GPU timestamps from the previous execute() call. Call after waiting on the render fence.
|
||||
void resolve_timings();
|
||||
|
||||
private:
|
||||
struct ImportedImage
|
||||
{
|
||||
@@ -139,4 +145,10 @@ private:
|
||||
EngineContext* _context = nullptr;
|
||||
RGResourceRegistry _resources;
|
||||
std::vector<Pass> _passes;
|
||||
|
||||
// --- Timing data for last executed frame ---
|
||||
VkQueryPool _timestampPool = VK_NULL_HANDLE; // holds 2 queries per pass (begin/end)
|
||||
std::vector<float> _lastGpuMillis; // per pass
|
||||
std::vector<float> _lastCpuMillis; // per pass (command recording time)
|
||||
std::vector<bool> _wroteTimestamps; // per pass; true if queries were written in last execute
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user