ADD: Clipmap shadow tuning
This commit is contained in:
@@ -15,6 +15,7 @@
|
||||
#include <fmt/core.h>
|
||||
|
||||
#include "vk_device.h"
|
||||
#include <chrono>
|
||||
|
||||
void RenderGraph::init(EngineContext *ctx)
|
||||
{
|
||||
@@ -603,6 +604,25 @@ bool RenderGraph::compile()
|
||||
|
||||
void RenderGraph::execute(VkCommandBuffer cmd)
|
||||
{
|
||||
// Create/reset timestamp query pool for this execution (2 queries per pass)
|
||||
if (_timestampPool != VK_NULL_HANDLE)
|
||||
{
|
||||
vkDestroyQueryPool(_context->getDevice()->device(), _timestampPool, nullptr);
|
||||
_timestampPool = VK_NULL_HANDLE;
|
||||
}
|
||||
const uint32_t queryCount = static_cast<uint32_t>(_passes.size() * 2);
|
||||
if (queryCount > 0)
|
||||
{
|
||||
VkQueryPoolCreateInfo qpci{ .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO };
|
||||
qpci.queryType = VK_QUERY_TYPE_TIMESTAMP;
|
||||
qpci.queryCount = queryCount;
|
||||
VK_CHECK(vkCreateQueryPool(_context->getDevice()->device(), &qpci, nullptr, &_timestampPool));
|
||||
vkCmdResetQueryPool(cmd, _timestampPool, 0, queryCount);
|
||||
}
|
||||
|
||||
_lastCpuMillis.assign(_passes.size(), -1.0f);
|
||||
_wroteTimestamps.assign(_passes.size(), false);
|
||||
|
||||
for (size_t passIndex = 0; passIndex < _passes.size(); ++passIndex)
|
||||
{
|
||||
auto &p = _passes[passIndex];
|
||||
@@ -626,6 +646,14 @@ void RenderGraph::execute(VkCommandBuffer cmd)
|
||||
vkCmdPipelineBarrier2(cmd, &dep);
|
||||
}
|
||||
|
||||
// Timestamp begin and CPU start after barriers
|
||||
if (_timestampPool != VK_NULL_HANDLE)
|
||||
{
|
||||
const uint32_t qidx = static_cast<uint32_t>(passIndex * 2 + 0);
|
||||
vkCmdWriteTimestamp2(cmd, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, _timestampPool, qidx);
|
||||
}
|
||||
auto cpuStart = std::chrono::high_resolution_clock::now();
|
||||
|
||||
// Begin dynamic rendering if the pass declared attachments
|
||||
bool doRendering = (!p.colorAttachments.empty() || p.hasDepth);
|
||||
if (doRendering)
|
||||
@@ -716,6 +744,16 @@ void RenderGraph::execute(VkCommandBuffer cmd)
|
||||
vkCmdEndRendering(cmd);
|
||||
}
|
||||
|
||||
// CPU end and timestamp end
|
||||
auto cpuEnd = std::chrono::high_resolution_clock::now();
|
||||
_lastCpuMillis[passIndex] = std::chrono::duration<float, std::milli>(cpuEnd - cpuStart).count();
|
||||
if (_timestampPool != VK_NULL_HANDLE)
|
||||
{
|
||||
const uint32_t qidx = static_cast<uint32_t>(passIndex * 2 + 1);
|
||||
vkCmdWriteTimestamp2(cmd, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, _timestampPool, qidx);
|
||||
_wroteTimestamps[passIndex] = true;
|
||||
}
|
||||
|
||||
if (_context && _context->getDevice())
|
||||
{
|
||||
vkdebug::cmd_end_label(_context->getDevice()->device(), cmd);
|
||||
@@ -788,6 +826,9 @@ void RenderGraph::debug_get_passes(std::vector<RGDebugPassInfo> &out) const
|
||||
info.bufferWrites = static_cast<uint32_t>(p.bufferWrites.size());
|
||||
info.colorAttachmentCount = static_cast<uint32_t>(p.colorAttachments.size());
|
||||
info.hasDepth = p.hasDepth;
|
||||
size_t idx = &p - _passes.data();
|
||||
if (idx < _lastGpuMillis.size()) info.gpuMillis = _lastGpuMillis[idx];
|
||||
if (idx < _lastCpuMillis.size()) info.cpuMillis = _lastCpuMillis[idx];
|
||||
out.push_back(std::move(info));
|
||||
}
|
||||
}
|
||||
@@ -894,3 +935,44 @@ RGImageHandle RenderGraph::import_swapchain_image(uint32_t index)
|
||||
d.currentLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
|
||||
return import_image(d);
|
||||
}
|
||||
|
||||
void RenderGraph::resolve_timings()
|
||||
{
|
||||
if (_timestampPool == VK_NULL_HANDLE || _passes.empty())
|
||||
{
|
||||
_lastGpuMillis.assign(_passes.size(), -1.0f);
|
||||
return;
|
||||
}
|
||||
|
||||
const uint32_t queryCount = static_cast<uint32_t>(_passes.size() * 2);
|
||||
std::vector<uint64_t> results(queryCount, 0);
|
||||
VkResult r = vkGetQueryPoolResults(
|
||||
_context->getDevice()->device(), _timestampPool,
|
||||
0, queryCount,
|
||||
sizeof(uint64_t) * results.size(), results.data(), sizeof(uint64_t),
|
||||
VK_QUERY_RESULT_64_BIT);
|
||||
// Convert ticks to ms
|
||||
VkPhysicalDeviceProperties props{};
|
||||
vkGetPhysicalDeviceProperties(_context->getDevice()->physicalDevice(), &props);
|
||||
const double tickNs = props.limits.timestampPeriod;
|
||||
|
||||
_lastGpuMillis.assign(_passes.size(), -1.0f);
|
||||
for (size_t i = 0; i < _passes.size(); ++i)
|
||||
{
|
||||
if (!_wroteTimestamps.empty() && !_wroteTimestamps[i]) { _lastGpuMillis[i] = -1.0f; continue; }
|
||||
const uint64_t t0 = results[i*2 + 0];
|
||||
const uint64_t t1 = results[i*2 + 1];
|
||||
if (t1 > t0)
|
||||
{
|
||||
double ns = double(t1 - t0) * tickNs;
|
||||
_lastGpuMillis[i] = static_cast<float>(ns / 1.0e6);
|
||||
}
|
||||
else
|
||||
{
|
||||
_lastGpuMillis[i] = -1.0f;
|
||||
}
|
||||
}
|
||||
|
||||
vkDestroyQueryPool(_context->getDevice()->device(), _timestampPool, nullptr);
|
||||
_timestampPool = VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
@@ -70,6 +70,9 @@ struct Pass; // fwd
|
||||
uint32_t bufferWrites = 0;
|
||||
uint32_t colorAttachmentCount = 0;
|
||||
bool hasDepth = false;
|
||||
// Last frame timings (ms); -1 when unavailable
|
||||
float gpuMillis = -1.0f;
|
||||
float cpuMillis = -1.0f;
|
||||
};
|
||||
|
||||
struct RGDebugImageInfo
|
||||
@@ -104,6 +107,9 @@ struct Pass; // fwd
|
||||
void debug_get_images(std::vector<RGDebugImageInfo>& out) const;
|
||||
void debug_get_buffers(std::vector<RGDebugBufferInfo>& out) const;
|
||||
|
||||
// Resolve GPU timestamps from the previous execute() call. Call after waiting on the render fence.
|
||||
void resolve_timings();
|
||||
|
||||
private:
|
||||
struct ImportedImage
|
||||
{
|
||||
@@ -137,6 +143,12 @@ private:
|
||||
};
|
||||
|
||||
EngineContext* _context = nullptr;
|
||||
RGResourceRegistry _resources;
|
||||
std::vector<Pass> _passes;
|
||||
RGResourceRegistry _resources;
|
||||
std::vector<Pass> _passes;
|
||||
|
||||
// --- Timing data for last executed frame ---
|
||||
VkQueryPool _timestampPool = VK_NULL_HANDLE; // holds 2 queries per pass (begin/end)
|
||||
std::vector<float> _lastGpuMillis; // per pass
|
||||
std::vector<float> _lastCpuMillis; // per pass (command recording time)
|
||||
std::vector<bool> _wroteTimestamps; // per pass; true if queries were written in last execute
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user