diff --git a/src/core/asset_manager.cpp b/src/core/asset_manager.cpp index 86b120c..1575e99 100644 --- a/src/core/asset_manager.cpp +++ b/src/core/asset_manager.cpp @@ -11,6 +11,9 @@ #include #include "asset_locator.h" #include +#include +#include +#include using std::filesystem::path; @@ -227,6 +230,116 @@ std::shared_ptr AssetManager::createMesh(const MeshCreateInfo &info) return mesh; } +size_t AssetManager::prefetchGLTFTextures(std::string_view nameOrPath) +{ + if (!_engine || !_engine->_context || !_engine->_context->textures) return 0; + if (nameOrPath.empty()) return 0; + + std::string resolved = assetPath(nameOrPath); + std::filesystem::path path = resolved; + + fastgltf::Parser parser{}; + constexpr auto gltfOptions = fastgltf::Options::DontRequireValidAssetMember | fastgltf::Options::AllowDouble | + fastgltf::Options::LoadGLBBuffers | fastgltf::Options::LoadExternalBuffers; + fastgltf::GltfDataBuffer data; + if (!data.loadFromFile(path)) return 0; + + fastgltf::Asset gltf; + size_t scheduled = 0; + + auto type = fastgltf::determineGltfFileType(&data); + if (type == fastgltf::GltfType::glTF) + { + auto load = parser.loadGLTF(&data, path.parent_path(), gltfOptions); + if (load) gltf = std::move(load.get()); else return 0; + } + else if (type == fastgltf::GltfType::GLB) + { + auto load = parser.loadBinaryGLTF(&data, path.parent_path(), gltfOptions); + if (load) gltf = std::move(load.get()); else return 0; + } + else + { + return 0; + } + + TextureCache *cache = _engine->_context->textures; + + auto enqueueTex = [&](size_t imgIndex, bool srgb) + { + if (imgIndex >= gltf.images.size()) return; + TextureCache::TextureKey key{}; + key.srgb = srgb; + key.mipmapped = true; + + fastgltf::Image &image = gltf.images[imgIndex]; + std::visit(fastgltf::visitor{ + [&](fastgltf::sources::URI &filePath) + { + const std::string p(filePath.uri.path().begin(), filePath.uri.path().end()); + key.kind = TextureCache::TextureKey::SourceKind::FilePath; + key.path = p; + std::string id = std::string("GLTF-PREF:") + p + (srgb ? "#sRGB" : "#UNORM"); + key.hash = texcache::fnv1a64(id); + }, + [&](fastgltf::sources::Vector &vector) + { + key.kind = TextureCache::TextureKey::SourceKind::Bytes; + key.bytes.assign(vector.bytes.begin(), vector.bytes.end()); + uint64_t h = texcache::fnv1a64(key.bytes.data(), key.bytes.size()); + key.hash = h ^ (srgb ? 0x9E3779B97F4A7C15ull : 0ull); + }, + [&](fastgltf::sources::BufferView &view) + { + auto &bufferView = gltf.bufferViews[view.bufferViewIndex]; + auto &buffer = gltf.buffers[bufferView.bufferIndex]; + std::visit(fastgltf::visitor{ + [](auto &arg) {}, + [&](fastgltf::sources::Vector &vec) + { + size_t off = bufferView.byteOffset; + size_t len = bufferView.byteLength; + key.kind = TextureCache::TextureKey::SourceKind::Bytes; + key.bytes.assign(vec.bytes.begin() + off, vec.bytes.begin() + off + len); + uint64_t h = texcache::fnv1a64(key.bytes.data(), key.bytes.size()); + key.hash = h ^ (srgb ? 0x9E3779B97F4A7C15ull : 0ull); + } + }, buffer.data); + }, + [](auto &other) {} + }, image.data); + + if (key.hash != 0) + { + VkSampler samp = _engine->_samplerManager->defaultLinear(); + cache->request(key, samp); + scheduled++; + } + }; + + for (const auto &tex : gltf.textures) + { + if (tex.imageIndex.has_value()) + { + // For baseColor we prefer sRGB; other maps requested later will reuse entry + enqueueTex(tex.imageIndex.value(), true); + } + } + + // Proactively free big buffer vectors we no longer need. + for (auto &buf : gltf.buffers) + { + std::visit(fastgltf::visitor{ + [](auto &arg) {}, + [&](fastgltf::sources::Vector &vec) { + std::vector().swap(vec.bytes); + } + }, buf.data); + } + + return scheduled; +} + static Bounds compute_bounds(std::span vertices) { Bounds b{}; diff --git a/src/core/asset_manager.h b/src/core/asset_manager.h index 6114ffa..6a2f439 100644 --- a/src/core/asset_manager.h +++ b/src/core/asset_manager.h @@ -76,6 +76,13 @@ public: std::optional > loadGLTF(std::string_view nameOrPath); + // Queue texture loads for a glTF file ahead of time. This parses the glTF, + // builds TextureCache keys for referenced images (both external URIs and + // embedded images in buffers), and issues TextureCache::request() calls. + // Actual uploads happen via the normal per-frame pump. + // Returns number of textures scheduled. + size_t prefetchGLTFTextures(std::string_view nameOrPath); + std::shared_ptr createMesh(const MeshCreateInfo &info); std::shared_ptr getPrimitive(std::string_view name) const; diff --git a/src/core/texture_cache.cpp b/src/core/texture_cache.cpp index e373c23..d08a678 100644 --- a/src/core/texture_cache.cpp +++ b/src/core/texture_cache.cpp @@ -94,6 +94,7 @@ TextureCache::TextureHandle TextureCache::request(const TextureKey &key, VkSampl else { e.bytes = normKey.bytes; + _cpuSourceBytes += e.bytes.size(); } _entries.push_back(std::move(e)); return h; @@ -185,7 +186,6 @@ void TextureCache::patch_to_fallback(const Entry &e) void TextureCache::pumpLoads(ResourceManager &rm, FrameResources &) { // Simple throttle to avoid massive spikes. - const int kMaxLoadsPerPump = 4; int started = 0; const uint32_t now = _context ? _context->frameIndex : 0u; for (auto &e : _entries) @@ -204,13 +204,16 @@ void TextureCache::pumpLoads(ResourceManager &rm, FrameResources &) if (recentlyUsed) { enqueue_decode(e); - if (++started >= kMaxLoadsPerPump) break; + if (++started >= _maxLoadsPerPump) break; } } } // Drain decoded results and enqueue GPU uploads. drain_ready_uploads(rm); + + // Optionally trim retained compressed sources to CPU budget. + evictCpuToBudget(); } void TextureCache::evictToBudget(size_t budgetBytes) @@ -298,11 +301,13 @@ void TextureCache::worker_loop() out.srgb = rq.key.srgb; if (data && w > 0 && h > 0) { - size_t sz = static_cast(w) * static_cast(h) * 4u; - out.rgba.resize(sz); - memcpy(out.rgba.data(), data, sz); + out.heap = data; + out.heapBytes = static_cast(w) * static_cast(h) * 4u; + } + else if (data) + { + stbi_image_free(data); } - if (data) stbi_image_free(data); { std::lock_guard lk(_readyMutex); @@ -324,7 +329,7 @@ void TextureCache::drain_ready_uploads(ResourceManager &rm) { if (res.handle == InvalidHandle || res.handle >= _entries.size()) continue; Entry &e = _entries[res.handle]; - if (res.rgba.empty() || res.width <= 0 || res.height <= 0) + if ((res.heap == nullptr && res.rgba.empty()) || res.width <= 0 || res.height <= 0) { e.state = EntryState::Evicted; // failed decode; keep fallback continue; @@ -332,8 +337,16 @@ void TextureCache::drain_ready_uploads(ResourceManager &rm) VkExtent3D extent{static_cast(res.width), static_cast(res.height), 1u}; VkFormat fmt = res.srgb ? VK_FORMAT_R8G8B8A8_SRGB : VK_FORMAT_R8G8B8A8_UNORM; - e.image = rm.create_image(static_cast(res.rgba.data()), extent, fmt, - VK_IMAGE_USAGE_SAMPLED_BIT, res.mipmapped); + const void *src = nullptr; + if (res.heap) + { + src = static_cast(res.heap); + } + else + { + src = static_cast(res.rgba.data()); + } + e.image = rm.create_image(src, extent, fmt, VK_IMAGE_USAGE_SAMPLED_BIT, res.mipmapped); if (vmaDebugEnabled()) { @@ -346,11 +359,58 @@ void TextureCache::drain_ready_uploads(ResourceManager &rm) _residentBytes += e.sizeBytes; e.state = EntryState::Resident; + // Drop source bytes if policy says so (only for Bytes-backed keys). + if (!_keepSourceBytes && e.key.kind == TextureKey::SourceKind::Bytes) + { + drop_source_bytes(e); + } + + // Free temporary decode heap if present + if (res.heap) + { + stbi_image_free(res.heap); + } + // Patch descriptors now; data becomes valid before sampling due to RG upload pass patch_ready_entry(e); } } +void TextureCache::drop_source_bytes(Entry &e) +{ + if (e.bytes.empty()) return; + if (e.key.kind != TextureKey::SourceKind::Bytes) return; + if (_cpuSourceBytes >= e.bytes.size()) _cpuSourceBytes -= e.bytes.size(); + e.bytes.clear(); + e.bytes.shrink_to_fit(); + e.path.clear(); +} + +void TextureCache::evictCpuToBudget() +{ + if (_cpuSourceBytes <= _cpuSourceBudget) return; + // Collect candidates: Resident entries with retained bytes + std::vector cands; + cands.reserve(_entries.size()); + for (TextureHandle h = 0; h < _entries.size(); ++h) + { + const Entry &e = _entries[h]; + if (e.state == EntryState::Resident && !e.bytes.empty() && e.key.kind == TextureKey::SourceKind::Bytes) + { + cands.push_back(h); + } + } + // LRU-ish: sort by lastUsed ascending + std::sort(cands.begin(), cands.end(), [&](TextureHandle a, TextureHandle b){ + return _entries[a].lastUsedFrame < _entries[b].lastUsedFrame; + }); + for (TextureHandle h : cands) + { + if (_cpuSourceBytes <= _cpuSourceBudget) break; + drop_source_bytes(_entries[h]); + } +} + void TextureCache::debug_snapshot(std::vector &outRows, DebugStats &outStats) const { outRows.clear(); diff --git a/src/core/texture_cache.h b/src/core/texture_cache.h index 2809a60..f29ff06 100644 --- a/src/core/texture_cache.h +++ b/src/core/texture_cache.h @@ -76,6 +76,25 @@ public: }; void debug_snapshot(std::vector& outRows, DebugStats& outStats) const; size_t resident_bytes() const { return _residentBytes; } + // CPU-side source bytes currently retained (compressed image payloads kept + // for potential re-decode). Only applies to entries created with Bytes keys. + size_t cpu_source_bytes() const { return _cpuSourceBytes; } + + // Runtime controls + void set_max_loads_per_pump(int n) { _maxLoadsPerPump = (n > 0) ? n : 1; } + int max_loads_per_pump() const { return _maxLoadsPerPump; } + + // If false (default), compressed source bytes are dropped once an image is + // uploaded to the GPU and descriptors patched. Set true to retain sources + // for potential re-decode after eviction. + void set_keep_source_bytes(bool keep) { _keepSourceBytes = keep; } + bool keep_source_bytes() const { return _keepSourceBytes; } + + // Set a soft CPU budget (in bytes) for retained compressed sources. After + // each upload drain, the cache will try to free source bytes for Resident + // entries until under budget. + void set_cpu_source_budget(size_t bytes) { _cpuSourceBudget = bytes; } + size_t cpu_source_budget() const { return _cpuSourceBudget; } private: struct Patch @@ -108,6 +127,12 @@ private: std::unordered_map _lookup; // key.hash -> handle std::unordered_map> _setToHandles; size_t _residentBytes{0}; + size_t _cpuSourceBytes{0}; + + // Controls + int _maxLoadsPerPump{4}; + bool _keepSourceBytes{false}; + size_t _cpuSourceBudget{64ull * 1024ull * 1024ull}; // 64 MiB default void start_load(Entry &e, ResourceManager &rm); void patch_ready_entry(const Entry &e); @@ -126,6 +151,11 @@ private: TextureHandle handle{InvalidHandle}; int width{0}; int height{0}; + // Prefer heap pointer from stb to avoid an extra memcpy into a vector. + // If 'heap' is non-null, it must be freed with stbi_image_free() after + // the upload has copied the data. 'rgba' remains as a fallback path. + unsigned char *heap{nullptr}; + size_t heapBytes{0}; std::vector rgba; bool mipmapped{true}; bool srgb{false}; @@ -134,6 +164,8 @@ private: void worker_loop(); void enqueue_decode(Entry &e); void drain_ready_uploads(ResourceManager &rm); + void drop_source_bytes(Entry &e); + void evictCpuToBudget(); std::vector _decodeThreads; std::mutex _qMutex; diff --git a/src/core/vk_engine.cpp b/src/core/vk_engine.cpp index 2b224ea..0b271fd 100644 --- a/src/core/vk_engine.cpp +++ b/src/core/vk_engine.cpp @@ -174,15 +174,37 @@ namespace { const size_t texBudget = query_texture_budget_bytes(dev); const size_t resBytes = eng->_textureCache->resident_bytes(); + const size_t cpuSrcBytes = eng->_textureCache->cpu_source_bytes(); ImGui::Text("Device local: %.1f / %.1f MiB", (double)devLocalUsage/1048576.0, (double)devLocalBudget/1048576.0); ImGui::Text("Texture budget: %.1f MiB", (double)texBudget/1048576.0); ImGui::Text("Resident textures: %.1f MiB", (double)resBytes/1048576.0); + ImGui::Text("CPU source bytes: %.1f MiB", (double)cpuSrcBytes/1048576.0); ImGui::SameLine(); if (ImGui::Button("Trim To Budget Now")) { eng->_textureCache->evictToBudget(texBudget); } + // Controls + static int loadsPerPump = 4; + loadsPerPump = eng->_textureCache->max_loads_per_pump(); + if (ImGui::SliderInt("Loads/Frame", &loadsPerPump, 1, 16)) + { + eng->_textureCache->set_max_loads_per_pump(loadsPerPump); + } + static bool keepSources = false; + keepSources = eng->_textureCache->keep_source_bytes(); + if (ImGui::Checkbox("Keep Source Bytes", &keepSources)) + { + eng->_textureCache->set_keep_source_bytes(keepSources); + } + static int cpuBudgetMiB = 64; + cpuBudgetMiB = (int)(eng->_textureCache->cpu_source_budget() / 1048576ull); + if (ImGui::SliderInt("CPU Source Budget (MiB)", &cpuBudgetMiB, 0, 2048)) + { + eng->_textureCache->set_cpu_source_budget((size_t)cpuBudgetMiB * 1048576ull); + } + TextureCache::DebugStats stats{}; std::vector rows; eng->_textureCache->debug_snapshot(rows, stats); @@ -535,6 +557,10 @@ void VulkanEngine::init() _textureCache = std::make_unique(); _textureCache->init(_context.get()); _context->textures = _textureCache.get(); + // Conservative defaults to avoid CPU spikes during heavy glTF loads. + _textureCache->set_max_loads_per_pump(3); + _textureCache->set_keep_source_bytes(false); + _textureCache->set_cpu_source_budget(32ull * 1024ull * 1024ull); // 32 MiB // Optional ray tracing manager if supported and extensions enabled if (_deviceManager->supportsRayQuery() && _deviceManager->supportsAccelerationStructure()) diff --git a/src/scene/vk_loader.cpp b/src/scene/vk_loader.cpp index 2576a74..b0a2c45 100644 --- a/src/scene/vk_loader.cpp +++ b/src/scene/vk_loader.cpp @@ -265,6 +265,9 @@ std::optional > loadGltf(VulkanEngine *engine, std:: //< load_arrays // Note: glTF images are now loaded on-demand via TextureCache. + // Resolve external image paths relative to the source glTF file directory + // to avoid failing to find textures when running from a different CWD. + const std::filesystem::path baseDir = path.parent_path(); auto buildTextureKey = [&](size_t imgIndex, bool srgb) -> TextureCache::TextureKey { TextureCache::TextureKey key{}; @@ -279,10 +282,16 @@ std::optional > loadGltf(VulkanEngine *engine, std:: std::visit(fastgltf::visitor{ [&](fastgltf::sources::URI &filePath) { - const std::string path(filePath.uri.path().begin(), filePath.uri.path().end()); + const std::string rel(filePath.uri.path().begin(), filePath.uri.path().end()); + // Build an absolute (or at least baseDir-resolved) path for IO + stable keying + std::filesystem::path resolved = std::filesystem::path(rel); + if (resolved.is_relative()) + { + resolved = baseDir / resolved; + } key.kind = TextureCache::TextureKey::SourceKind::FilePath; - key.path = path; - std::string id = std::string("GLTF:") + path + (srgb ? "#sRGB" : "#UNORM"); + key.path = resolved.string(); + std::string id = std::string("GLTF:") + key.path + (srgb ? "#sRGB" : "#UNORM"); key.hash = texcache::fnv1a64(id); }, [&](fastgltf::sources::Vector &vector) @@ -577,6 +586,19 @@ std::optional > loadGltf(VulkanEngine *engine, std:: } newmesh->meshBuffers = engine->_resourceManager->uploadMesh(indices, vertices); + // If CPU vectors ballooned for this mesh, release capacity back to the OS + auto shrink_if_huge = [](auto &vec, size_t elemSizeBytes) { + const size_t capBytes = vec.capacity() * elemSizeBytes; + const size_t kThreshold = 64ull * 1024ull * 1024ull; // 64 MiB + if (capBytes > kThreshold) + { + using Vec = std::remove_reference_t; + Vec empty; + vec.swap(empty); + } + }; + shrink_if_huge(indices, sizeof(uint32_t)); + shrink_if_huge(vertices, sizeof(Vertex)); if (engine->_rayManager) { engine->_rayManager->getOrBuildBLAS(newmesh); @@ -646,6 +668,25 @@ std::optional > loadGltf(VulkanEngine *engine, std:: node->refreshTransform(glm::mat4{1.f}); } } + // We no longer need glTF-owned buffer payloads; free any large vectors + for (auto &buf : gltf.buffers) + { + std::visit(fastgltf::visitor{ + [](auto &arg) {}, + [&](fastgltf::sources::Vector &vec) { + std::vector().swap(vec.bytes); + } + }, buf.data); + } + for (auto &img : gltf.images) + { + std::visit(fastgltf::visitor{ + [](auto &arg) {}, + [&](fastgltf::sources::Vector &vec) { + std::vector().swap(vec.bytes); + } + }, img.data); + } return scene; //< load_graph }