diff --git a/shaders/deferred_lighting.frag b/shaders/deferred_lighting.frag index 03bfe94..0060746 100644 --- a/shaders/deferred_lighting.frag +++ b/shaders/deferred_lighting.frag @@ -8,7 +8,6 @@ layout(location=0) out vec4 outColor; layout(set=1, binding=0) uniform sampler2D posTex; layout(set=1, binding=1) uniform sampler2D normalTex; layout(set=1, binding=2) uniform sampler2D albedoTex; -// Mixed near + CSM: shadowTex[0] is the near/simple map, 1..N-1 are cascades layout(set=2, binding=0) uniform sampler2D shadowTex[4]; const float PI = 3.14159265359; @@ -42,7 +41,7 @@ uint selectCascadeIndex(vec3 worldPos) return i; } } - return 3u; // fallback to farthest level + return 3u; } float calcShadowVisibility(vec3 worldPos, vec3 N, vec3 L) @@ -71,7 +70,7 @@ float calcShadowVisibility(vec3 worldPos, vec3 N, vec3 L) vec2 texelSize = 1.0 / vec2(dim); float baseRadius = 1.25; - // Slightly increase filter for farther cascades + float radius = mix(baseRadius, baseRadius * 3.0, float(ci) / 3.0); float ang = hash12(suv * 4096.0) * 6.2831853; diff --git a/src/core/config.h b/src/core/config.h index 2542387..6aab173 100644 --- a/src/core/config.h +++ b/src/core/config.h @@ -10,14 +10,9 @@ inline constexpr bool kUseValidationLayers = true; // Shadow mapping configuration inline constexpr int kShadowCascadeCount = 4; // Maximum shadow distance for CSM in view-space units -inline constexpr float kShadowCSMFar = 400.0f; +inline constexpr float kShadowCSMFar = 800.0f; // Shadow map resolution used for stabilization (texel snapping). Must match actual image size. inline constexpr float kShadowMapResolution = 2048.0f; -// Extra XY expansion for cascade footprint (safety against FOV/aspect changes) -inline constexpr float kShadowCascadeRadiusScale = 2.5f; -// Additive XY margin in world units (light-space) beyond scaled radius -inline constexpr float kShadowCascadeRadiusMargin = 40.0f; - // Clipmap shadow configuration (used when cascades operate in clipmap mode) // Base coverage radius of level 0 around the camera (world units). Each level doubles the radius. inline constexpr float kShadowClipBaseRadius = 20.0f; diff --git a/src/core/vk_engine.cpp b/src/core/vk_engine.cpp index 3ee5630..4d50369 100644 --- a/src/core/vk_engine.cpp +++ b/src/core/vk_engine.cpp @@ -128,7 +128,7 @@ void VulkanEngine::init() auto imguiPass = std::make_unique(); _renderPassManager->setImGuiPass(std::move(imguiPass)); - const std::string structurePath = _assetManager->modelPath("police_office.glb"); + const std::string structurePath = _assetManager->modelPath("resi.glb"); const auto structureFile = _assetManager->loadGLTF(structurePath); assert(structureFile.has_value()); @@ -263,6 +263,11 @@ void VulkanEngine::draw() VK_CHECK(vkWaitForFences(_deviceManager->device(), 1, &get_current_frame()._renderFence, true, 1000000000)); get_current_frame()._deletionQueue.flush(); + // Resolve last frame's pass timings before we clear and rebuild the graph + if (_renderGraph) + { + _renderGraph->resolve_timings(); + } get_current_frame()._frameDescriptors.clear_pools(_deviceManager->device()); //< frame_clear @@ -515,13 +520,15 @@ void VulkanEngine::run() ImGui::SameLine(); ImGui::Text("%zu passes", passInfos.size()); - if (ImGui::BeginTable("passes", 6, ImGuiTableFlags_RowBg | ImGuiTableFlags_SizingStretchProp)) + if (ImGui::BeginTable("passes", 8, ImGuiTableFlags_RowBg | ImGuiTableFlags_SizingStretchProp)) { ImGui::TableSetupColumn("Enable", ImGuiTableColumnFlags_WidthFixed, 70); ImGui::TableSetupColumn("Name"); - ImGui::TableSetupColumn("Type", ImGuiTableColumnFlags_WidthFixed, 90); - ImGui::TableSetupColumn("Imgs", ImGuiTableColumnFlags_WidthFixed, 60); - ImGui::TableSetupColumn("Bufs", ImGuiTableColumnFlags_WidthFixed, 60); + ImGui::TableSetupColumn("Type", ImGuiTableColumnFlags_WidthFixed, 80); + ImGui::TableSetupColumn("GPU ms", ImGuiTableColumnFlags_WidthFixed, 70); + ImGui::TableSetupColumn("CPU rec ms", ImGuiTableColumnFlags_WidthFixed, 90); + ImGui::TableSetupColumn("Imgs", ImGuiTableColumnFlags_WidthFixed, 55); + ImGui::TableSetupColumn("Bufs", ImGuiTableColumnFlags_WidthFixed, 55); ImGui::TableSetupColumn("Attachments", ImGuiTableColumnFlags_WidthFixed, 100); ImGui::TableHeadersRow(); @@ -551,10 +558,14 @@ void VulkanEngine::run() ImGui::TableSetColumnIndex(2); ImGui::TextUnformatted(typeName(pi.type)); ImGui::TableSetColumnIndex(3); - ImGui::Text("%u/%u", pi.imageReads, pi.imageWrites); + if (pi.gpuMillis >= 0.0f) ImGui::Text("%.2f", pi.gpuMillis); else ImGui::TextUnformatted("-"); ImGui::TableSetColumnIndex(4); - ImGui::Text("%u/%u", pi.bufferReads, pi.bufferWrites); + if (pi.cpuMillis >= 0.0f) ImGui::Text("%.2f", pi.cpuMillis); else ImGui::TextUnformatted("-"); ImGui::TableSetColumnIndex(5); + ImGui::Text("%u/%u", pi.imageReads, pi.imageWrites); + ImGui::TableSetColumnIndex(6); + ImGui::Text("%u/%u", pi.bufferReads, pi.bufferWrites); + ImGui::TableSetColumnIndex(7); ImGui::Text("%u%s", pi.colorAttachmentCount, pi.hasDepth ? "+D" : ""); } ImGui::EndTable(); diff --git a/src/render/rg_graph.cpp b/src/render/rg_graph.cpp index 0338e35..061d3d9 100644 --- a/src/render/rg_graph.cpp +++ b/src/render/rg_graph.cpp @@ -15,6 +15,7 @@ #include #include "vk_device.h" +#include void RenderGraph::init(EngineContext *ctx) { @@ -603,6 +604,25 @@ bool RenderGraph::compile() void RenderGraph::execute(VkCommandBuffer cmd) { + // Create/reset timestamp query pool for this execution (2 queries per pass) + if (_timestampPool != VK_NULL_HANDLE) + { + vkDestroyQueryPool(_context->getDevice()->device(), _timestampPool, nullptr); + _timestampPool = VK_NULL_HANDLE; + } + const uint32_t queryCount = static_cast(_passes.size() * 2); + if (queryCount > 0) + { + VkQueryPoolCreateInfo qpci{ .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO }; + qpci.queryType = VK_QUERY_TYPE_TIMESTAMP; + qpci.queryCount = queryCount; + VK_CHECK(vkCreateQueryPool(_context->getDevice()->device(), &qpci, nullptr, &_timestampPool)); + vkCmdResetQueryPool(cmd, _timestampPool, 0, queryCount); + } + + _lastCpuMillis.assign(_passes.size(), -1.0f); + _wroteTimestamps.assign(_passes.size(), false); + for (size_t passIndex = 0; passIndex < _passes.size(); ++passIndex) { auto &p = _passes[passIndex]; @@ -626,6 +646,14 @@ void RenderGraph::execute(VkCommandBuffer cmd) vkCmdPipelineBarrier2(cmd, &dep); } + // Timestamp begin and CPU start after barriers + if (_timestampPool != VK_NULL_HANDLE) + { + const uint32_t qidx = static_cast(passIndex * 2 + 0); + vkCmdWriteTimestamp2(cmd, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, _timestampPool, qidx); + } + auto cpuStart = std::chrono::high_resolution_clock::now(); + // Begin dynamic rendering if the pass declared attachments bool doRendering = (!p.colorAttachments.empty() || p.hasDepth); if (doRendering) @@ -716,6 +744,16 @@ void RenderGraph::execute(VkCommandBuffer cmd) vkCmdEndRendering(cmd); } + // CPU end and timestamp end + auto cpuEnd = std::chrono::high_resolution_clock::now(); + _lastCpuMillis[passIndex] = std::chrono::duration(cpuEnd - cpuStart).count(); + if (_timestampPool != VK_NULL_HANDLE) + { + const uint32_t qidx = static_cast(passIndex * 2 + 1); + vkCmdWriteTimestamp2(cmd, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, _timestampPool, qidx); + _wroteTimestamps[passIndex] = true; + } + if (_context && _context->getDevice()) { vkdebug::cmd_end_label(_context->getDevice()->device(), cmd); @@ -788,6 +826,9 @@ void RenderGraph::debug_get_passes(std::vector &out) const info.bufferWrites = static_cast(p.bufferWrites.size()); info.colorAttachmentCount = static_cast(p.colorAttachments.size()); info.hasDepth = p.hasDepth; + size_t idx = &p - _passes.data(); + if (idx < _lastGpuMillis.size()) info.gpuMillis = _lastGpuMillis[idx]; + if (idx < _lastCpuMillis.size()) info.cpuMillis = _lastCpuMillis[idx]; out.push_back(std::move(info)); } } @@ -894,3 +935,44 @@ RGImageHandle RenderGraph::import_swapchain_image(uint32_t index) d.currentLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; return import_image(d); } + +void RenderGraph::resolve_timings() +{ + if (_timestampPool == VK_NULL_HANDLE || _passes.empty()) + { + _lastGpuMillis.assign(_passes.size(), -1.0f); + return; + } + + const uint32_t queryCount = static_cast(_passes.size() * 2); + std::vector results(queryCount, 0); + VkResult r = vkGetQueryPoolResults( + _context->getDevice()->device(), _timestampPool, + 0, queryCount, + sizeof(uint64_t) * results.size(), results.data(), sizeof(uint64_t), + VK_QUERY_RESULT_64_BIT); + // Convert ticks to ms + VkPhysicalDeviceProperties props{}; + vkGetPhysicalDeviceProperties(_context->getDevice()->physicalDevice(), &props); + const double tickNs = props.limits.timestampPeriod; + + _lastGpuMillis.assign(_passes.size(), -1.0f); + for (size_t i = 0; i < _passes.size(); ++i) + { + if (!_wroteTimestamps.empty() && !_wroteTimestamps[i]) { _lastGpuMillis[i] = -1.0f; continue; } + const uint64_t t0 = results[i*2 + 0]; + const uint64_t t1 = results[i*2 + 1]; + if (t1 > t0) + { + double ns = double(t1 - t0) * tickNs; + _lastGpuMillis[i] = static_cast(ns / 1.0e6); + } + else + { + _lastGpuMillis[i] = -1.0f; + } + } + + vkDestroyQueryPool(_context->getDevice()->device(), _timestampPool, nullptr); + _timestampPool = VK_NULL_HANDLE; +} diff --git a/src/render/rg_graph.h b/src/render/rg_graph.h index 85359f9..6606327 100644 --- a/src/render/rg_graph.h +++ b/src/render/rg_graph.h @@ -70,6 +70,9 @@ struct Pass; // fwd uint32_t bufferWrites = 0; uint32_t colorAttachmentCount = 0; bool hasDepth = false; + // Last frame timings (ms); -1 when unavailable + float gpuMillis = -1.0f; + float cpuMillis = -1.0f; }; struct RGDebugImageInfo @@ -104,6 +107,9 @@ struct Pass; // fwd void debug_get_images(std::vector& out) const; void debug_get_buffers(std::vector& out) const; + // Resolve GPU timestamps from the previous execute() call. Call after waiting on the render fence. + void resolve_timings(); + private: struct ImportedImage { @@ -137,6 +143,12 @@ private: }; EngineContext* _context = nullptr; - RGResourceRegistry _resources; - std::vector _passes; + RGResourceRegistry _resources; + std::vector _passes; + + // --- Timing data for last executed frame --- + VkQueryPool _timestampPool = VK_NULL_HANDLE; // holds 2 queries per pass (begin/end) + std::vector _lastGpuMillis; // per pass + std::vector _lastCpuMillis; // per pass (command recording time) + std::vector _wroteTimestamps; // per pass; true if queries were written in last execute };