From dad6db971b6e8c4ec39ec9d7708d4af3919ab4e6 Mon Sep 17 00:00:00 2001 From: hydrogendeuteride Date: Tue, 4 Nov 2025 21:10:39 +0900 Subject: [PATCH] ADD: Texture load multithreaded --- src/core/asset_manager.cpp | 71 ++++++++++--- src/core/texture_cache.cpp | 213 ++++++++++++++++++++++++++++--------- src/core/texture_cache.h | 53 +++++++++ src/core/vk_engine.cpp | 106 ++++++++++++++++-- src/render/rg_graph.cpp | 16 ++- src/render/rg_graph.h | 7 +- src/scene/vk_scene.cpp | 2 +- 7 files changed, 391 insertions(+), 77 deletions(-) diff --git a/src/core/asset_manager.cpp b/src/core/asset_manager.cpp index b613f30..86b120c 100644 --- a/src/core/asset_manager.cpp +++ b/src/core/asset_manager.cpp @@ -10,6 +10,7 @@ #include #include #include "asset_locator.h" +#include using std::filesystem::path; @@ -156,33 +157,73 @@ std::shared_ptr AssetManager::createMesh(const MeshCreateInfo &info) const auto &opt = info.material.options; - auto [albedo, createdAlbedo] = loadImageFromAsset(opt.albedoPath, opt.albedoSRGB); - auto [mr, createdMR] = loadImageFromAsset(opt.metalRoughPath, opt.metalRoughSRGB); - auto [normal, createdNormal] = loadImageFromAsset(opt.normalPath, opt.normalSRGB); - - const AllocatedImage &albedoRef = createdAlbedo ? albedo : _engine->_errorCheckerboardImage; - const AllocatedImage &mrRef = createdMR ? mr : _engine->_whiteImage; - const AllocatedImage &normRef = createdNormal ? normal : _engine->_flatNormalImage; - + // Fallbacks are bound now; real textures will patch in via TextureCache AllocatedBuffer matBuffer = createMaterialBufferWithConstants(opt.constants); GLTFMetallic_Roughness::MaterialResources res{}; - res.colorImage = albedoRef; + res.colorImage = _engine->_errorCheckerboardImage; // visible fallback for albedo res.colorSampler = _engine->_samplerManager->defaultLinear(); - res.metalRoughImage = mrRef; + res.metalRoughImage = _engine->_whiteImage; res.metalRoughSampler = _engine->_samplerManager->defaultLinear(); - res.normalImage = normRef; + res.normalImage = _engine->_flatNormalImage; res.normalSampler = _engine->_samplerManager->defaultLinear(); res.dataBuffer = matBuffer.buffer; res.dataBufferOffset = 0; auto mat = createMaterial(opt.pass, res); - auto mesh = createMesh(info.name, vertsSpan, indsSpan, mat); + // Register dynamic texture bindings using the central TextureCache + if (_engine && _engine->_context && _engine->_context->textures) + { + TextureCache *cache = _engine->_context->textures; + auto buildKey = [&](std::string_view path, bool srgb) -> TextureCache::TextureKey { + TextureCache::TextureKey k{}; + if (!path.empty()) + { + k.kind = TextureCache::TextureKey::SourceKind::FilePath; + k.path = assetPath(path); + k.srgb = srgb; + k.mipmapped = true; + std::string id = std::string("PRIM:") + k.path + (srgb ? "#sRGB" : "#UNORM"); + k.hash = texcache::fnv1a64(id); + } + return k; + }; + + if (!opt.albedoPath.empty()) + { + auto key = buildKey(opt.albedoPath, opt.albedoSRGB); + if (key.hash != 0) + { + VkSampler samp = _engine->_samplerManager->defaultLinear(); + auto handle = cache->request(key, samp); + cache->watchBinding(handle, mat->data.materialSet, 1u, samp, _engine->_errorCheckerboardImage.imageView); + } + } + if (!opt.metalRoughPath.empty()) + { + auto key = buildKey(opt.metalRoughPath, opt.metalRoughSRGB); + if (key.hash != 0) + { + VkSampler samp = _engine->_samplerManager->defaultLinear(); + auto handle = cache->request(key, samp); + cache->watchBinding(handle, mat->data.materialSet, 2u, samp, _engine->_whiteImage.imageView); + } + } + if (!opt.normalPath.empty()) + { + auto key = buildKey(opt.normalPath, opt.normalSRGB); + if (key.hash != 0) + { + VkSampler samp = _engine->_samplerManager->defaultLinear(); + auto handle = cache->request(key, samp); + cache->watchBinding(handle, mat->data.materialSet, 3u, samp, _engine->_flatNormalImage.imageView); + } + } + } + + auto mesh = createMesh(info.name, vertsSpan, indsSpan, mat); _meshMaterialBuffers.emplace(info.name, matBuffer); - if (createdAlbedo) _meshOwnedImages[info.name].push_back(albedo); - if (createdMR) _meshOwnedImages[info.name].push_back(mr); - if (createdNormal) _meshOwnedImages[info.name].push_back(normal); return mesh; } diff --git a/src/core/texture_cache.cpp b/src/core/texture_cache.cpp index d89dc00..e373c23 100644 --- a/src/core/texture_cache.cpp +++ b/src/core/texture_cache.cpp @@ -8,14 +8,32 @@ #include "stb_image.h" #include #include "vk_device.h" +#include void TextureCache::init(EngineContext *ctx) { _context = ctx; + _running = true; + unsigned int threads = std::max(1u, std::min(4u, std::thread::hardware_concurrency())); + _decodeThreads.reserve(threads); + for (unsigned int i = 0; i < threads; ++i) + { + _decodeThreads.emplace_back([this]() { worker_loop(); }); + } } void TextureCache::cleanup() { + // Stop worker thread first + if (_running.exchange(false)) + { + { + std::lock_guard lk(_qMutex); + } + _qCV.notify_all(); + for (auto &t : _decodeThreads) if (t.joinable()) t.join(); + _decodeThreads.clear(); + } if (!_context || !_context->getResources()) return; auto *rm = _context->getResources(); for (auto &e : _entries) @@ -126,53 +144,8 @@ static inline size_t estimate_rgba8_bytes(uint32_t w, uint32_t h) void TextureCache::start_load(Entry &e, ResourceManager &rm) { - if (e.state == EntryState::Resident || e.state == EntryState::Loading) return; - - int width = 0, height = 0, comp = 0; - unsigned char *data = nullptr; - - if (e.key.kind == TextureKey::SourceKind::FilePath) - { - data = stbi_load(e.path.c_str(), &width, &height, &comp, 4); - } - else - { - if (!e.bytes.empty()) - { - data = stbi_load_from_memory(e.bytes.data(), static_cast(e.bytes.size()), &width, &height, &comp, 4); - } - } - - if (!data || width <= 0 || height <= 0) - { - // Failed decode; keep fallbacks bound. Mark as evicted/unloaded. - if (data) stbi_image_free(data); - e.state = EntryState::Evicted; - return; - } - - VkExtent3D extent{static_cast(width), static_cast(height), 1u}; - VkFormat fmt = e.key.srgb ? VK_FORMAT_R8G8B8A8_SRGB : VK_FORMAT_R8G8B8A8_UNORM; - - // Queue upload via ResourceManager (deferred pass if enabled) - e.image = rm.create_image(static_cast(data), extent, fmt, VK_IMAGE_USAGE_SAMPLED_BIT, e.key.mipmapped); - - // Name VMA allocation for diagnostics - if (vmaDebugEnabled()) - { - std::string name = e.key.kind == TextureKey::SourceKind::FilePath ? e.path : std::string("tex.bytes"); - vmaSetAllocationName(_context->getDevice()->allocator(), e.image.allocation, name.c_str()); - } - - const float mipFactor = e.key.mipmapped ? 1.3333333f : 1.0f; // approx sum of 1/4^i - e.sizeBytes = static_cast(estimate_rgba8_bytes(extent.width, extent.height) * mipFactor); - _residentBytes += e.sizeBytes; - e.state = EntryState::Resident; - - stbi_image_free(data); - - // Patch all watched descriptors to the new image - patch_ready_entry(e); + // Legacy synchronous path retained for completeness but not used by pumpLoads now. + enqueue_decode(e); } void TextureCache::patch_ready_entry(const Entry &e) @@ -230,11 +203,14 @@ void TextureCache::pumpLoads(ResourceManager &rm, FrameResources &) } if (recentlyUsed) { - start_load(e, rm); + enqueue_decode(e); if (++started >= kMaxLoadsPerPump) break; } } } + + // Drain decoded results and enqueue GPU uploads. + drain_ready_uploads(rm); } void TextureCache::evictToBudget(size_t budgetBytes) @@ -270,3 +246,144 @@ void TextureCache::evictToBudget(size_t budgetBytes) if (_residentBytes >= e.sizeBytes) _residentBytes -= e.sizeBytes; else _residentBytes = 0; } } + +void TextureCache::enqueue_decode(Entry &e) +{ + if (e.state != EntryState::Unloaded) return; + e.state = EntryState::Loading; + DecodeRequest rq{}; + rq.handle = static_cast(&e - _entries.data()); + rq.key = e.key; + if (e.key.kind == TextureKey::SourceKind::FilePath) rq.path = e.path; else rq.bytes = e.bytes; + { + std::lock_guard lk(_qMutex); + _queue.push_back(std::move(rq)); + } + _qCV.notify_one(); +} + +void TextureCache::worker_loop() +{ + while (_running) + { + DecodeRequest rq{}; + { + std::unique_lock lk(_qMutex); + _qCV.wait(lk, [this]{ return !_running || !_queue.empty(); }); + if (!_running) break; + rq = std::move(_queue.front()); + _queue.pop_front(); + } + + // Decode using stb_image + int w = 0, h = 0, comp = 0; + unsigned char *data = nullptr; + if (rq.key.kind == TextureKey::SourceKind::FilePath) + { + data = stbi_load(rq.path.c_str(), &w, &h, &comp, 4); + } + else + { + if (!rq.bytes.empty()) + { + data = stbi_load_from_memory(rq.bytes.data(), static_cast(rq.bytes.size()), &w, &h, &comp, 4); + } + } + + DecodedResult out{}; + out.handle = rq.handle; + out.width = w; + out.height = h; + out.mipmapped = rq.key.mipmapped; + out.srgb = rq.key.srgb; + if (data && w > 0 && h > 0) + { + size_t sz = static_cast(w) * static_cast(h) * 4u; + out.rgba.resize(sz); + memcpy(out.rgba.data(), data, sz); + } + if (data) stbi_image_free(data); + + { + std::lock_guard lk(_readyMutex); + _ready.push_back(std::move(out)); + } + } +} + +void TextureCache::drain_ready_uploads(ResourceManager &rm) +{ + std::deque local; + { + std::lock_guard lk(_readyMutex); + if (_ready.empty()) return; + local.swap(_ready); + } + + for (auto &res : local) + { + if (res.handle == InvalidHandle || res.handle >= _entries.size()) continue; + Entry &e = _entries[res.handle]; + if (res.rgba.empty() || res.width <= 0 || res.height <= 0) + { + e.state = EntryState::Evicted; // failed decode; keep fallback + continue; + } + + VkExtent3D extent{static_cast(res.width), static_cast(res.height), 1u}; + VkFormat fmt = res.srgb ? VK_FORMAT_R8G8B8A8_SRGB : VK_FORMAT_R8G8B8A8_UNORM; + e.image = rm.create_image(static_cast(res.rgba.data()), extent, fmt, + VK_IMAGE_USAGE_SAMPLED_BIT, res.mipmapped); + + if (vmaDebugEnabled()) + { + std::string name = e.key.kind == TextureKey::SourceKind::FilePath ? e.path : std::string("tex.bytes"); + vmaSetAllocationName(_context->getDevice()->allocator(), e.image.allocation, name.c_str()); + } + + const float mipFactor = res.mipmapped ? 1.3333333f : 1.0f; + e.sizeBytes = static_cast(estimate_rgba8_bytes(extent.width, extent.height) * mipFactor); + _residentBytes += e.sizeBytes; + e.state = EntryState::Resident; + + // Patch descriptors now; data becomes valid before sampling due to RG upload pass + patch_ready_entry(e); + } +} + +void TextureCache::debug_snapshot(std::vector &outRows, DebugStats &outStats) const +{ + outRows.clear(); + outStats = DebugStats{}; + outStats.residentBytes = _residentBytes; + + auto stateToByteable = [&](const Entry &e) -> bool { return e.state == EntryState::Resident; }; + + for (const auto &e : _entries) + { + switch (e.state) + { + case EntryState::Resident: outStats.countResident++; break; + case EntryState::Evicted: outStats.countEvicted++; break; + case EntryState::Unloaded: outStats.countUnloaded++; break; + case EntryState::Loading: /* ignore */ break; + } + + DebugRow row{}; + if (e.key.kind == TextureKey::SourceKind::FilePath) + { + row.name = e.path.empty() ? std::string("") : e.path; + } + else + { + row.name = std::string(" (") + std::to_string(e.bytes.size()) + ")"; + } + row.bytes = e.sizeBytes; + row.lastUsed = e.lastUsedFrame; + row.state = static_cast(e.state); + outRows.push_back(std::move(row)); + } + std::sort(outRows.begin(), outRows.end(), [](const DebugRow &a, const DebugRow &b) { + return a.bytes > b.bytes; + }); +} diff --git a/src/core/texture_cache.h b/src/core/texture_cache.h index de5248e..2809a60 100644 --- a/src/core/texture_cache.h +++ b/src/core/texture_cache.h @@ -5,6 +5,11 @@ #include #include #include +#include +#include +#include +#include +#include class EngineContext; class ResourceManager; @@ -54,6 +59,24 @@ public: // Evict least-recently-used entries to fit within a budget in bytes. void evictToBudget(size_t budgetBytes); + // Debug snapshot for UI + struct DebugRow + { + std::string name; + size_t bytes{0}; + uint32_t lastUsed{0}; + uint8_t state{0}; // cast of EntryState + }; + struct DebugStats + { + size_t residentBytes{0}; + size_t countResident{0}; + size_t countEvicted{0}; + size_t countUnloaded{0}; + }; + void debug_snapshot(std::vector& outRows, DebugStats& outStats) const; + size_t resident_bytes() const { return _residentBytes; } + private: struct Patch { @@ -89,6 +112,36 @@ private: void start_load(Entry &e, ResourceManager &rm); void patch_ready_entry(const Entry &e); void patch_to_fallback(const Entry &e); + + // --- Async decode backend --- + struct DecodeRequest + { + TextureHandle handle{InvalidHandle}; + TextureKey key{}; + std::string path; + std::vector bytes; + }; + struct DecodedResult + { + TextureHandle handle{InvalidHandle}; + int width{0}; + int height{0}; + std::vector rgba; + bool mipmapped{true}; + bool srgb{false}; + }; + + void worker_loop(); + void enqueue_decode(Entry &e); + void drain_ready_uploads(ResourceManager &rm); + + std::vector _decodeThreads; + std::mutex _qMutex; + std::condition_variable _qCV; + std::deque _queue; + std::mutex _readyMutex; + std::deque _ready; + std::atomic _running{false}; }; // Helpers to build/digest keys diff --git a/src/core/vk_engine.cpp b/src/core/vk_engine.cpp index ec8b11e..2b224ea 100644 --- a/src/core/vk_engine.cpp +++ b/src/core/vk_engine.cpp @@ -50,6 +50,7 @@ #include "engine_context.h" #include "core/vk_pipeline_manager.h" #include "core/config.h" +#include "core/texture_cache.h" // Query a conservative streaming texture budget based on VMA-reported // device-local heap budgets. Uses ~35% of total device-local budget. @@ -133,6 +134,82 @@ namespace { ImGui::Text("Swap fmt: %s", string_VkFormat(eng->_swapchainManager->swapchainImageFormat())); } + // Texture streaming + budget UI + static const char* stateName(uint8_t s) + { + switch (s) + { + case 0: return "Unloaded"; + case 1: return "Loading"; + case 2: return "Resident"; + case 3: return "Evicted"; + default: return "?"; + } + } + + static void ui_textures(VulkanEngine *eng) + { + if (!eng || !eng->_textureCache) { ImGui::TextUnformatted("TextureCache not available"); return; } + DeviceManager* dev = eng->_deviceManager.get(); + VmaAllocator alloc = dev ? dev->allocator() : VK_NULL_HANDLE; + unsigned long long devLocalBudget = 0, devLocalUsage = 0; + if (alloc) + { + const VkPhysicalDeviceMemoryProperties* memProps = nullptr; + vmaGetMemoryProperties(alloc, &memProps); + VmaBudget budgets[VK_MAX_MEMORY_HEAPS] = {}; + vmaGetHeapBudgets(alloc, budgets); + if (memProps) + { + for (uint32_t i = 0; i < memProps->memoryHeapCount; ++i) + { + if (memProps->memoryHeaps[i].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) + { + devLocalBudget += budgets[i].budget; + devLocalUsage += budgets[i].usage; + } + } + } + } + + const size_t texBudget = query_texture_budget_bytes(dev); + const size_t resBytes = eng->_textureCache->resident_bytes(); + ImGui::Text("Device local: %.1f / %.1f MiB", (double)devLocalUsage/1048576.0, (double)devLocalBudget/1048576.0); + ImGui::Text("Texture budget: %.1f MiB", (double)texBudget/1048576.0); + ImGui::Text("Resident textures: %.1f MiB", (double)resBytes/1048576.0); + ImGui::SameLine(); + if (ImGui::Button("Trim To Budget Now")) + { + eng->_textureCache->evictToBudget(texBudget); + } + + TextureCache::DebugStats stats{}; + std::vector rows; + eng->_textureCache->debug_snapshot(rows, stats); + ImGui::Text("Counts R:%zu U:%zu E:%zu", stats.countResident, stats.countUnloaded, stats.countEvicted); + + const int topN = 12; + if (ImGui::BeginTable("texrows", 4, ImGuiTableFlags_RowBg | ImGuiTableFlags_SizingStretchProp)) + { + ImGui::TableSetupColumn("MiB", ImGuiTableColumnFlags_WidthFixed, 80); + ImGui::TableSetupColumn("State", ImGuiTableColumnFlags_WidthFixed, 90); + ImGui::TableSetupColumn("LastUsed", ImGuiTableColumnFlags_WidthFixed, 90); + ImGui::TableSetupColumn("Name"); + ImGui::TableHeadersRow(); + int count = 0; + for (const auto &r : rows) + { + if (count++ >= topN) break; + ImGui::TableNextRow(); + ImGui::TableSetColumnIndex(0); ImGui::Text("%.2f", (double)r.bytes/1048576.0); + ImGui::TableSetColumnIndex(1); ImGui::TextUnformatted(stateName(r.state)); + ImGui::TableSetColumnIndex(2); ImGui::Text("%u", r.lastUsed); + ImGui::TableSetColumnIndex(3); ImGui::TextUnformatted(r.name.c_str()); + } + ImGui::EndTable(); + } + } + // Shadows / Ray Query controls static void ui_shadows(VulkanEngine *eng) { @@ -495,7 +572,7 @@ void VulkanEngine::init() auto imguiPass = std::make_unique(); _renderPassManager->setImGuiPass(std::move(imguiPass)); - const std::string structurePath = _assetManager->modelPath("mirage.glb"); + const std::string structurePath = _assetManager->modelPath("Untitled.glb"); const auto structureFile = _assetManager->loadGLTF(structurePath); assert(structureFile.has_value()); @@ -625,6 +702,12 @@ void VulkanEngine::cleanup() print_vma_stats(_deviceManager.get(), "after Compute"); dump_vma_json(_deviceManager.get(), "after_Compute"); + // Ensure RenderGraph's timestamp query pool is destroyed before the device. + if (_renderGraph) + { + _renderGraph->shutdown(); + } + _swapchainManager->cleanup(); print_vma_stats(_deviceManager.get(), "after Swapchain"); dump_vma_json(_deviceManager.get(), "after_Swapchain"); @@ -910,8 +993,8 @@ void VulkanEngine::run() if (ImGui::Begin("Debug")) { const ImGuiTabBarFlags tf = ImGuiTabBarFlags_Reorderable | ImGuiTabBarFlags_AutoSelectNewTabs; - if (ImGui::BeginTabBar("DebugTabs", tf)) - { + if (ImGui::BeginTabBar("DebugTabs", tf)) + { if (ImGui::BeginTabItem("Overview")) { ui_overview(this); @@ -942,13 +1025,18 @@ void VulkanEngine::run() ui_postfx(this); ImGui::EndTabItem(); } - if (ImGui::BeginTabItem("Scene")) - { - ui_scene(this); - ImGui::EndTabItem(); - } - ImGui::EndTabBar(); + if (ImGui::BeginTabItem("Scene")) + { + ui_scene(this); + ImGui::EndTabItem(); } + if (ImGui::BeginTabItem("Textures")) + { + ui_textures(this); + ImGui::EndTabItem(); + } + ImGui::EndTabBar(); + } ImGui::End(); } ImGui::Render(); diff --git a/src/render/rg_graph.cpp b/src/render/rg_graph.cpp index fa89964..9ce9e5d 100644 --- a/src/render/rg_graph.cpp +++ b/src/render/rg_graph.cpp @@ -25,8 +25,20 @@ void RenderGraph::init(EngineContext *ctx) void RenderGraph::clear() { - _passes.clear(); - _resources.reset(); + _passes.clear(); + _resources.reset(); +} + +void RenderGraph::shutdown() +{ + // If a timestamp pool exists, ensure the GPU is not using it and destroy it. + if (_timestampPool != VK_NULL_HANDLE && _context && _context->getDevice()) + { + // Be conservative here: make sure the graphics queue is idle before destroying. + vkQueueWaitIdle(_context->getDevice()->graphicsQueue()); + vkDestroyQueryPool(_context->getDevice()->device(), _timestampPool, nullptr); + _timestampPool = VK_NULL_HANDLE; + } } RGImageHandle RenderGraph::import_image(const RGImportedImageDesc &desc) diff --git a/src/render/rg_graph.h b/src/render/rg_graph.h index 6606327..dc9e5ea 100644 --- a/src/render/rg_graph.h +++ b/src/render/rg_graph.h @@ -15,8 +15,11 @@ class EngineContext; class RenderGraph { public: - void init(EngineContext* ctx); - void clear(); + void init(EngineContext* ctx); + void clear(); + // Destroy any GPU-side state owned by the graph (e.g. query pools). + // Call during engine shutdown before destroying the VkDevice. + void shutdown(); // Import externally owned images (swapchain, drawImage, g-buffers) RGImageHandle import_image(const RGImportedImageDesc& desc); diff --git a/src/scene/vk_scene.cpp b/src/scene/vk_scene.cpp index 3265cad..1677a4a 100644 --- a/src/scene/vk_scene.cpp +++ b/src/scene/vk_scene.cpp @@ -23,7 +23,7 @@ void SceneManager::init(EngineContext *context) mainCamera.yaw = 0; sceneData.ambientColor = glm::vec4(0.1f, 0.1f, 0.1f, 1.0f); - sceneData.sunlightDirection = glm::vec4(-1.0f, -1.0f, -1.0f, 1.0f); + sceneData.sunlightDirection = glm::vec4(-1.0f, -1.0f, -0.1f, 1.0f); sceneData.sunlightColor = glm::vec4(1.0f, 1.0f, 1.0f, 3.0f); }