diff --git a/src/core/texture_cache.cpp b/src/core/texture_cache.cpp index d08a678..da013fc 100644 --- a/src/core/texture_cache.cpp +++ b/src/core/texture_cache.cpp @@ -9,6 +9,7 @@ #include #include "vk_device.h" #include +#include void TextureCache::init(EngineContext *ctx) { @@ -118,6 +119,24 @@ void TextureCache::watchBinding(TextureHandle handle, VkDescriptorSet set, uint3 _setToHandles[set].push_back(handle); } +void TextureCache::unwatchSet(VkDescriptorSet set) +{ + if (set == VK_NULL_HANDLE) return; + auto it = _setToHandles.find(set); + if (it == _setToHandles.end()) return; + + const auto &handles = it->second; + for (TextureHandle h : handles) + { + if (h >= _entries.size()) continue; + auto &patches = _entries[h].patches; + patches.erase(std::remove_if(patches.begin(), patches.end(), + [&](const Patch &p){ return p.set == set; }), + patches.end()); + } + _setToHandles.erase(it); +} + void TextureCache::markUsed(TextureHandle handle, uint32_t frameIndex) { if (handle == InvalidHandle) return; @@ -190,7 +209,8 @@ void TextureCache::pumpLoads(ResourceManager &rm, FrameResources &) const uint32_t now = _context ? _context->frameIndex : 0u; for (auto &e : _entries) { - if (e.state == EntryState::Unloaded) + // Allow both Unloaded and Evicted entries to start work if seen again. + if (e.state == EntryState::Unloaded || e.state == EntryState::Evicted) { // Visibility-driven residency: only start uploads for textures // that were marked used recently (current or previous frame). @@ -201,7 +221,9 @@ void TextureCache::pumpLoads(ResourceManager &rm, FrameResources &) // Schedule when first seen (previous frame) or if seen again. recentlyUsed = (now == 0u) || (now - e.lastUsedFrame <= 1u); } - if (recentlyUsed) + // Gate reload attempts to avoid rapid oscillation right after eviction. + bool cooldownPassed = (now >= e.nextAttemptFrame); + if (recentlyUsed && cooldownPassed) { enqueue_decode(e); if (++started >= _maxLoadsPerPump) break; @@ -233,12 +255,15 @@ void TextureCache::evictToBudget(size_t budgetBytes) } std::sort(order.begin(), order.end(), [](auto &a, auto &b) { return a.second < b.second; }); + const uint32_t now = _context ? _context->frameIndex : 0u; for (auto &pair : order) { if (_residentBytes <= budgetBytes) break; TextureHandle h = pair.first; Entry &e = _entries[h]; if (e.state != EntryState::Resident) continue; + // Prefer not to evict textures used this frame unless strictly necessary. + if (e.lastUsedFrame == now) continue; // Rewrite watchers back to fallback before destroying patch_to_fallback(e); @@ -246,13 +271,15 @@ void TextureCache::evictToBudget(size_t budgetBytes) _context->getResources()->destroy_image(e.image); e.image = {}; e.state = EntryState::Evicted; + e.lastEvictedFrame = now; + e.nextAttemptFrame = std::max(e.nextAttemptFrame, now + _reloadCooldownFrames); if (_residentBytes >= e.sizeBytes) _residentBytes -= e.sizeBytes; else _residentBytes = 0; } } void TextureCache::enqueue_decode(Entry &e) { - if (e.state != EntryState::Unloaded) return; + if (e.state != EntryState::Unloaded && e.state != EntryState::Evicted) return; e.state = EntryState::Loading; DecodeRequest rq{}; rq.handle = static_cast(&e - _entries.data()); @@ -335,17 +362,34 @@ void TextureCache::drain_ready_uploads(ResourceManager &rm) continue; } + const uint32_t now = _context ? _context->frameIndex : 0u; VkExtent3D extent{static_cast(res.width), static_cast(res.height), 1u}; VkFormat fmt = res.srgb ? VK_FORMAT_R8G8B8A8_SRGB : VK_FORMAT_R8G8B8A8_UNORM; - const void *src = nullptr; - if (res.heap) + + // Estimate resident size for admission control (match post-upload computation) + const float mipFactor = res.mipmapped ? 1.3333333f : 1.0f; + const size_t expectedBytes = static_cast(estimate_rgba8_bytes(extent.width, extent.height) * mipFactor); + + if (_gpuBudgetBytes != std::numeric_limits::max()) { - src = static_cast(res.heap); - } - else - { - src = static_cast(res.rgba.data()); + if (_residentBytes + expectedBytes > _gpuBudgetBytes) + { + size_t need = (_residentBytes + expectedBytes) - _gpuBudgetBytes; + (void)try_make_space(need, now); + } + if (_residentBytes + expectedBytes > _gpuBudgetBytes) + { + // Not enough space even after eviction → back off; free decode heap + if (res.heap) { stbi_image_free(res.heap); res.heap = nullptr; } + e.state = EntryState::Evicted; + e.lastEvictedFrame = now; + e.nextAttemptFrame = std::max(e.nextAttemptFrame, now + _reloadCooldownFrames); + continue; + } } + + const void *src = res.heap ? static_cast(res.heap) + : static_cast(res.rgba.data()); e.image = rm.create_image(src, extent, fmt, VK_IMAGE_USAGE_SAMPLED_BIT, res.mipmapped); if (vmaDebugEnabled()) @@ -354,10 +398,10 @@ void TextureCache::drain_ready_uploads(ResourceManager &rm) vmaSetAllocationName(_context->getDevice()->allocator(), e.image.allocation, name.c_str()); } - const float mipFactor = res.mipmapped ? 1.3333333f : 1.0f; - e.sizeBytes = static_cast(estimate_rgba8_bytes(extent.width, extent.height) * mipFactor); + e.sizeBytes = expectedBytes; _residentBytes += e.sizeBytes; e.state = EntryState::Resident; + e.nextAttemptFrame = 0; // clear backoff after success // Drop source bytes if policy says so (only for Bytes-backed keys). if (!_keepSourceBytes && e.key.kind == TextureKey::SourceKind::Bytes) @@ -411,6 +455,43 @@ void TextureCache::evictCpuToBudget() } } +bool TextureCache::try_make_space(size_t bytesNeeded, uint32_t now) +{ + if (bytesNeeded == 0) return true; + if (_residentBytes == 0) return false; + + // Collect candidates that were not used this frame, oldest first + std::vector> order; + order.reserve(_entries.size()); + for (TextureHandle h = 0; h < _entries.size(); ++h) + { + const auto &e = _entries[h]; + if (e.state == EntryState::Resident && e.lastUsedFrame != now) + { + order.emplace_back(h, e.lastUsedFrame); + } + } + std::sort(order.begin(), order.end(), [](auto &a, auto &b) { return a.second < b.second; }); + + size_t freed = 0; + for (auto &pair : order) + { + if (freed >= bytesNeeded) break; + Entry &e = _entries[pair.first]; + if (e.state != EntryState::Resident) continue; + + patch_to_fallback(e); + _context->getResources()->destroy_image(e.image); + e.image = {}; + e.state = EntryState::Evicted; + e.lastEvictedFrame = now; + e.nextAttemptFrame = std::max(e.nextAttemptFrame, now + _reloadCooldownFrames); + if (_residentBytes >= e.sizeBytes) _residentBytes -= e.sizeBytes; else _residentBytes = 0; + freed += e.sizeBytes; + } + return freed >= bytesNeeded; +} + void TextureCache::debug_snapshot(std::vector &outRows, DebugStats &outStats) const { outRows.clear(); diff --git a/src/core/texture_cache.h b/src/core/texture_cache.h index f29ff06..0d6ae26 100644 --- a/src/core/texture_cache.h +++ b/src/core/texture_cache.h @@ -10,6 +10,7 @@ #include #include #include +#include class EngineContext; class ResourceManager; @@ -48,6 +49,10 @@ public: void watchBinding(TextureHandle handle, VkDescriptorSet set, uint32_t binding, VkSampler sampler, VkImageView fallbackView); + // Remove all watches for a descriptor set (call before destroying the + // pool that owns the set). Prevents attempts to patch dead sets. + void unwatchSet(VkDescriptorSet set); + // Mark a texture as used this frame (for LRU). void markUsed(TextureHandle handle, uint32_t frameIndex); // Convenience: mark all handles watched by a descriptor set. @@ -96,6 +101,11 @@ public: void set_cpu_source_budget(size_t bytes) { _cpuSourceBudget = bytes; } size_t cpu_source_budget() const { return _cpuSourceBudget; } + // Optional GPU residency budget, used to avoid immediate thrashing when + // accepting new uploads. The engine should refresh this each frame. + void set_gpu_budget_bytes(size_t bytes) { _gpuBudgetBytes = bytes; } + size_t gpu_budget_bytes() const { return _gpuBudgetBytes; } + private: struct Patch { @@ -115,6 +125,8 @@ private: AllocatedImage image{}; // valid when Resident size_t sizeBytes{0}; // approximate VRAM cost uint32_t lastUsedFrame{0}; + uint32_t lastEvictedFrame{0}; + uint32_t nextAttemptFrame{0}; // gate reload attempts to reduce churn std::vector patches; // descriptor patches to rewrite // Source payload for deferred load @@ -133,6 +145,8 @@ private: int _maxLoadsPerPump{4}; bool _keepSourceBytes{false}; size_t _cpuSourceBudget{64ull * 1024ull * 1024ull}; // 64 MiB default + size_t _gpuBudgetBytes{std::numeric_limits::max()}; // unlimited unless set + uint32_t _reloadCooldownFrames{2}; void start_load(Entry &e, ResourceManager &rm); void patch_ready_entry(const Entry &e); @@ -167,6 +181,11 @@ private: void drop_source_bytes(Entry &e); void evictCpuToBudget(); + // Try to free at least 'bytesNeeded' by evicting least-recently-used + // Resident entries that were not used in the current frame. Returns true + // if enough space was reclaimed. Does not evict textures used this frame. + bool try_make_space(size_t bytesNeeded, uint32_t now); + std::vector _decodeThreads; std::mutex _qMutex; std::condition_variable _qCV; diff --git a/src/core/vk_engine.cpp b/src/core/vk_engine.cpp index 0b271fd..b2a63d8 100644 --- a/src/core/vk_engine.cpp +++ b/src/core/vk_engine.cpp @@ -173,6 +173,7 @@ namespace { } const size_t texBudget = query_texture_budget_bytes(dev); + eng->_textureCache->set_gpu_budget_bytes(texBudget); const size_t resBytes = eng->_textureCache->resident_bytes(); const size_t cpuSrcBytes = eng->_textureCache->cpu_source_bytes(); ImGui::Text("Device local: %.1f / %.1f MiB", (double)devLocalUsage/1048576.0, (double)devLocalBudget/1048576.0); @@ -560,7 +561,7 @@ void VulkanEngine::init() // Conservative defaults to avoid CPU spikes during heavy glTF loads. _textureCache->set_max_loads_per_pump(3); _textureCache->set_keep_source_bytes(false); - _textureCache->set_cpu_source_budget(32ull * 1024ull * 1024ull); // 32 MiB + _textureCache->set_cpu_source_budget(64ull * 1024ull * 1024ull); // 32 MiB // Optional ray tracing manager if supported and extensions enabled if (_deviceManager->supportsRayQuery() && _deviceManager->supportsAccelerationStructure()) @@ -598,7 +599,7 @@ void VulkanEngine::init() auto imguiPass = std::make_unique(); _renderPassManager->setImGuiPass(std::move(imguiPass)); - const std::string structurePath = _assetManager->modelPath("Untitled.glb"); + const std::string structurePath = _assetManager->modelPath("seoul_high/scene.gltf"); const auto structureFile = _assetManager->loadGLTF(structurePath); assert(structureFile.has_value()); @@ -864,6 +865,7 @@ void VulkanEngine::draw() if (_textureCache) { size_t budget = query_texture_budget_bytes(_deviceManager.get()); + _textureCache->set_gpu_budget_bytes(budget); _textureCache->evictToBudget(budget); _textureCache->pumpLoads(*_resourceManager, get_current_frame()); } diff --git a/src/scene/vk_loader.cpp b/src/scene/vk_loader.cpp index b0a2c45..4563927 100644 --- a/src/scene/vk_loader.cpp +++ b/src/scene/vk_loader.cpp @@ -704,6 +704,20 @@ void LoadedGLTF::clearAll() { VkDevice dv = creator->_deviceManager->device(); + // Before destroying descriptor pools, unregister descriptor-set watches so + // the TextureCache will not attempt to patch dead sets. + if (creator && creator->_context && creator->_context->textures) + { + TextureCache *cache = creator->_context->textures; + for (auto &[k, mat] : materials) + { + if (mat && mat->data.materialSet != VK_NULL_HANDLE) + { + cache->unwatchSet(mat->data.materialSet); + } + } + } + for (auto &[k, v]: meshes) { if (creator->_rayManager)