ADD: texture prefetch

2025-11-06 14:36:03 +09:00
parent dad6db971b
commit 6be30e98b4
6 changed files with 291 additions and 12 deletions
--- a/src/core/asset_manager.cpp
+++ b/src/core/asset_manager.cpp
@@ -11,6 +11,9 @@
 #include <stb_image.h>
 #include "asset_locator.h"
 #include <core/texture_cache.h>
+#include <fastgltf/parser.hpp>
+#include <fastgltf/util.hpp>
+#include <fastgltf/tools.hpp>

 using std::filesystem::path;

@@ -227,6 +230,116 @@ std::shared_ptr<MeshAsset> AssetManager::createMesh(const MeshCreateInfo &info)
    return mesh;
 }

+size_t AssetManager::prefetchGLTFTextures(std::string_view nameOrPath)
+{
+    if (!_engine || !_engine->_context || !_engine->_context->textures) return 0;
+    if (nameOrPath.empty()) return 0;
+
+    std::string resolved = assetPath(nameOrPath);
+    std::filesystem::path path = resolved;
+
+    fastgltf::Parser parser{};
+    constexpr auto gltfOptions = fastgltf::Options::DontRequireValidAssetMember | fastgltf::Options::AllowDouble |
+                                 fastgltf::Options::LoadGLBBuffers | fastgltf::Options::LoadExternalBuffers;
+    fastgltf::GltfDataBuffer data;
+    if (!data.loadFromFile(path)) return 0;
+
+    fastgltf::Asset gltf;
+    size_t scheduled = 0;
+
+    auto type = fastgltf::determineGltfFileType(&data);
+    if (type == fastgltf::GltfType::glTF)
+    {
+        auto load = parser.loadGLTF(&data, path.parent_path(), gltfOptions);
+        if (load) gltf = std::move(load.get()); else return 0;
+    }
+    else if (type == fastgltf::GltfType::GLB)
+    {
+        auto load = parser.loadBinaryGLTF(&data, path.parent_path(), gltfOptions);
+        if (load) gltf = std::move(load.get()); else return 0;
+    }
+    else
+    {
+        return 0;
+    }
+
+    TextureCache *cache = _engine->_context->textures;
+
+    auto enqueueTex = [&](size_t imgIndex, bool srgb)
+    {
+        if (imgIndex >= gltf.images.size()) return;
+        TextureCache::TextureKey key{};
+        key.srgb = srgb;
+        key.mipmapped = true;
+
+        fastgltf::Image &image = gltf.images[imgIndex];
+        std::visit(fastgltf::visitor{
+            [&](fastgltf::sources::URI &filePath)
+            {
+                const std::string p(filePath.uri.path().begin(), filePath.uri.path().end());
+                key.kind = TextureCache::TextureKey::SourceKind::FilePath;
+                key.path = p;
+                std::string id = std::string("GLTF-PREF:") + p + (srgb ? "#sRGB" : "#UNORM");
+                key.hash = texcache::fnv1a64(id);
+            },
+            [&](fastgltf::sources::Vector &vector)
+            {
+                key.kind = TextureCache::TextureKey::SourceKind::Bytes;
+                key.bytes.assign(vector.bytes.begin(), vector.bytes.end());
+                uint64_t h = texcache::fnv1a64(key.bytes.data(), key.bytes.size());
+                key.hash = h ^ (srgb ? 0x9E3779B97F4A7C15ull : 0ull);
+            },
+            [&](fastgltf::sources::BufferView &view)
+            {
+                auto &bufferView = gltf.bufferViews[view.bufferViewIndex];
+                auto &buffer = gltf.buffers[bufferView.bufferIndex];
+                std::visit(fastgltf::visitor{
+                    [](auto &arg) {},
+                    [&](fastgltf::sources::Vector &vec)
+                    {
+                        size_t off = bufferView.byteOffset;
+                        size_t len = bufferView.byteLength;
+                        key.kind = TextureCache::TextureKey::SourceKind::Bytes;
+                        key.bytes.assign(vec.bytes.begin() + off, vec.bytes.begin() + off + len);
+                        uint64_t h = texcache::fnv1a64(key.bytes.data(), key.bytes.size());
+                        key.hash = h ^ (srgb ? 0x9E3779B97F4A7C15ull : 0ull);
+                    }
+                }, buffer.data);
+            },
+            [](auto &other) {}
+        }, image.data);
+
+        if (key.hash != 0)
+        {
+            VkSampler samp = _engine->_samplerManager->defaultLinear();
+            cache->request(key, samp);
+            scheduled++;
+        }
+    };
+
+    for (const auto &tex : gltf.textures)
+    {
+        if (tex.imageIndex.has_value())
+        {
+            // For baseColor we prefer sRGB; other maps requested later will reuse entry
+            enqueueTex(tex.imageIndex.value(), true);
+        }
+    }
+
+    // Proactively free big buffer vectors we no longer need.
+    for (auto &buf : gltf.buffers)
+    {
+        std::visit(fastgltf::visitor{
+            [](auto &arg) {},
+            [&](fastgltf::sources::Vector &vec) {
+                std::vector<uint8_t>().swap(vec.bytes);
+            }
+        }, buf.data);
+    }
+
+    return scheduled;
+}
+
 static Bounds compute_bounds(std::span<Vertex> vertices)
 {
    Bounds b{};
--- a/src/core/asset_manager.h
+++ b/src/core/asset_manager.h
@@ -76,6 +76,13 @@ public:

    std::optional<std::shared_ptr<LoadedGLTF> > loadGLTF(std::string_view nameOrPath);

+    // Queue texture loads for a glTF file ahead of time. This parses the glTF,
+    // builds TextureCache keys for referenced images (both external URIs and
+    // embedded images in buffers), and issues TextureCache::request() calls.
+    // Actual uploads happen via the normal per-frame pump.
+    // Returns number of textures scheduled.
+    size_t prefetchGLTFTextures(std::string_view nameOrPath);
+
    std::shared_ptr<MeshAsset> createMesh(const MeshCreateInfo &info);

    std::shared_ptr<MeshAsset> getPrimitive(std::string_view name) const;
--- a/src/core/texture_cache.cpp
+++ b/src/core/texture_cache.cpp
@@ -94,6 +94,7 @@ TextureCache::TextureHandle TextureCache::request(const TextureKey &key, VkSampl
    else
    {
        e.bytes = normKey.bytes;
+        _cpuSourceBytes += e.bytes.size();
    }
    _entries.push_back(std::move(e));
    return h;
@@ -185,7 +186,6 @@ void TextureCache::patch_to_fallback(const Entry &e)
 void TextureCache::pumpLoads(ResourceManager &rm, FrameResources &)
 {
    // Simple throttle to avoid massive spikes.
-    const int kMaxLoadsPerPump = 4;
    int started = 0;
    const uint32_t now = _context ? _context->frameIndex : 0u;
    for (auto &e : _entries)
@@ -204,13 +204,16 @@ void TextureCache::pumpLoads(ResourceManager &rm, FrameResources &)
            if (recentlyUsed)
            {
                enqueue_decode(e);
-                if (++started >= kMaxLoadsPerPump) break;
+                if (++started >= _maxLoadsPerPump) break;
            }
        }
    }

    // Drain decoded results and enqueue GPU uploads.
    drain_ready_uploads(rm);
+
+    // Optionally trim retained compressed sources to CPU budget.
+    evictCpuToBudget();
 }

 void TextureCache::evictToBudget(size_t budgetBytes)
@@ -298,11 +301,13 @@ void TextureCache::worker_loop()
        out.srgb = rq.key.srgb;
        if (data && w > 0 && h > 0)
        {
-            size_t sz = static_cast<size_t>(w) * static_cast<size_t>(h) * 4u;
-            out.rgba.resize(sz);
-            memcpy(out.rgba.data(), data, sz);
+            out.heap = data;
+            out.heapBytes = static_cast<size_t>(w) * static_cast<size_t>(h) * 4u;
+        }
+        else if (data)
+        {
+            stbi_image_free(data);
        }
-        if (data) stbi_image_free(data);

        {
            std::lock_guard<std::mutex> lk(_readyMutex);
@@ -324,7 +329,7 @@ void TextureCache::drain_ready_uploads(ResourceManager &rm)
    {
        if (res.handle == InvalidHandle || res.handle >= _entries.size()) continue;
        Entry &e = _entries[res.handle];
-        if (res.rgba.empty() || res.width <= 0 || res.height <= 0)
+        if ((res.heap == nullptr && res.rgba.empty()) || res.width <= 0 || res.height <= 0)
        {
            e.state = EntryState::Evicted; // failed decode; keep fallback
            continue;
@@ -332,8 +337,16 @@ void TextureCache::drain_ready_uploads(ResourceManager &rm)

        VkExtent3D extent{static_cast<uint32_t>(res.width), static_cast<uint32_t>(res.height), 1u};
        VkFormat fmt = res.srgb ? VK_FORMAT_R8G8B8A8_SRGB : VK_FORMAT_R8G8B8A8_UNORM;
-        e.image = rm.create_image(static_cast<void *>(res.rgba.data()), extent, fmt,
-                                  VK_IMAGE_USAGE_SAMPLED_BIT, res.mipmapped);
+        const void *src = nullptr;
+        if (res.heap)
+        {
+            src = static_cast<const void *>(res.heap);
+        }
+        else
+        {
+            src = static_cast<const void *>(res.rgba.data());
+        }
+        e.image = rm.create_image(src, extent, fmt, VK_IMAGE_USAGE_SAMPLED_BIT, res.mipmapped);

        if (vmaDebugEnabled())
        {
@@ -346,11 +359,58 @@ void TextureCache::drain_ready_uploads(ResourceManager &rm)
        _residentBytes += e.sizeBytes;
        e.state = EntryState::Resident;

+        // Drop source bytes if policy says so (only for Bytes-backed keys).
+        if (!_keepSourceBytes && e.key.kind == TextureKey::SourceKind::Bytes)
+        {
+            drop_source_bytes(e);
+        }
+
+        // Free temporary decode heap if present
+        if (res.heap)
+        {
+            stbi_image_free(res.heap);
+        }
+
        // Patch descriptors now; data becomes valid before sampling due to RG upload pass
        patch_ready_entry(e);
    }
 }

+void TextureCache::drop_source_bytes(Entry &e)
+{
+    if (e.bytes.empty()) return;
+    if (e.key.kind != TextureKey::SourceKind::Bytes) return;
+    if (_cpuSourceBytes >= e.bytes.size()) _cpuSourceBytes -= e.bytes.size();
+    e.bytes.clear();
+    e.bytes.shrink_to_fit();
+    e.path.clear();
+}
+
+void TextureCache::evictCpuToBudget()
+{
+    if (_cpuSourceBytes <= _cpuSourceBudget) return;
+    // Collect candidates: Resident entries with retained bytes
+    std::vector<TextureHandle> cands;
+    cands.reserve(_entries.size());
+    for (TextureHandle h = 0; h < _entries.size(); ++h)
+    {
+        const Entry &e = _entries[h];
+        if (e.state == EntryState::Resident && !e.bytes.empty() && e.key.kind == TextureKey::SourceKind::Bytes)
+        {
+            cands.push_back(h);
+        }
+    }
+    // LRU-ish: sort by lastUsed ascending
+    std::sort(cands.begin(), cands.end(), [&](TextureHandle a, TextureHandle b){
+        return _entries[a].lastUsedFrame < _entries[b].lastUsedFrame;
+    });
+    for (TextureHandle h : cands)
+    {
+        if (_cpuSourceBytes <= _cpuSourceBudget) break;
+        drop_source_bytes(_entries[h]);
+    }
+}
+
 void TextureCache::debug_snapshot(std::vector<DebugRow> &outRows, DebugStats &outStats) const
 {
    outRows.clear();
--- a/src/core/texture_cache.h
+++ b/src/core/texture_cache.h
@@ -76,6 +76,25 @@ public:
    };
    void debug_snapshot(std::vector<DebugRow>& outRows, DebugStats& outStats) const;
    size_t resident_bytes() const { return _residentBytes; }
+    // CPU-side source bytes currently retained (compressed image payloads kept
+    // for potential re-decode). Only applies to entries created with Bytes keys.
+    size_t cpu_source_bytes() const { return _cpuSourceBytes; }
+
+    // Runtime controls
+    void set_max_loads_per_pump(int n) { _maxLoadsPerPump = (n > 0) ? n : 1; }
+    int  max_loads_per_pump() const { return _maxLoadsPerPump; }
+
+    // If false (default), compressed source bytes are dropped once an image is
+    // uploaded to the GPU and descriptors patched. Set true to retain sources
+    // for potential re-decode after eviction.
+    void set_keep_source_bytes(bool keep) { _keepSourceBytes = keep; }
+    bool keep_source_bytes() const { return _keepSourceBytes; }
+
+    // Set a soft CPU budget (in bytes) for retained compressed sources. After
+    // each upload drain, the cache will try to free source bytes for Resident
+    // entries until under budget.
+    void set_cpu_source_budget(size_t bytes) { _cpuSourceBudget = bytes; }
+    size_t cpu_source_budget() const { return _cpuSourceBudget; }

 private:
    struct Patch
@@ -108,6 +127,12 @@ private:
    std::unordered_map<uint64_t, TextureHandle> _lookup; // key.hash -> handle
    std::unordered_map<VkDescriptorSet, std::vector<TextureHandle>> _setToHandles;
    size_t _residentBytes{0};
+    size_t _cpuSourceBytes{0};
+
+    // Controls
+    int _maxLoadsPerPump{4};
+    bool _keepSourceBytes{false};
+    size_t _cpuSourceBudget{64ull * 1024ull * 1024ull}; // 64 MiB default

    void start_load(Entry &e, ResourceManager &rm);
    void patch_ready_entry(const Entry &e);
@@ -126,6 +151,11 @@ private:
        TextureHandle handle{InvalidHandle};
        int width{0};
        int height{0};
+        // Prefer heap pointer from stb to avoid an extra memcpy into a vector.
+        // If 'heap' is non-null, it must be freed with stbi_image_free() after
+        // the upload has copied the data. 'rgba' remains as a fallback path.
+        unsigned char *heap{nullptr};
+        size_t heapBytes{0};
        std::vector<uint8_t> rgba;
        bool mipmapped{true};
        bool srgb{false};
@@ -134,6 +164,8 @@ private:
    void worker_loop();
    void enqueue_decode(Entry &e);
    void drain_ready_uploads(ResourceManager &rm);
+    void drop_source_bytes(Entry &e);
+    void evictCpuToBudget();

    std::vector<std::thread> _decodeThreads;
    std::mutex _qMutex;
--- a/src/core/vk_engine.cpp
+++ b/src/core/vk_engine.cpp
@@ -174,15 +174,37 @@ namespace {

        const size_t texBudget = query_texture_budget_bytes(dev);
        const size_t resBytes = eng->_textureCache->resident_bytes();
+        const size_t cpuSrcBytes = eng->_textureCache->cpu_source_bytes();
        ImGui::Text("Device local: %.1f / %.1f MiB", (double)devLocalUsage/1048576.0, (double)devLocalBudget/1048576.0);
        ImGui::Text("Texture budget: %.1f MiB", (double)texBudget/1048576.0);
        ImGui::Text("Resident textures: %.1f MiB", (double)resBytes/1048576.0);
+        ImGui::Text("CPU source bytes: %.1f MiB", (double)cpuSrcBytes/1048576.0);
        ImGui::SameLine();
        if (ImGui::Button("Trim To Budget Now"))
        {
            eng->_textureCache->evictToBudget(texBudget);
        }

+        // Controls
+        static int loadsPerPump = 4;
+        loadsPerPump = eng->_textureCache->max_loads_per_pump();
+        if (ImGui::SliderInt("Loads/Frame", &loadsPerPump, 1, 16))
+        {
+            eng->_textureCache->set_max_loads_per_pump(loadsPerPump);
+        }
+        static bool keepSources = false;
+        keepSources = eng->_textureCache->keep_source_bytes();
+        if (ImGui::Checkbox("Keep Source Bytes", &keepSources))
+        {
+            eng->_textureCache->set_keep_source_bytes(keepSources);
+        }
+        static int cpuBudgetMiB = 64;
+        cpuBudgetMiB = (int)(eng->_textureCache->cpu_source_budget() / 1048576ull);
+        if (ImGui::SliderInt("CPU Source Budget (MiB)", &cpuBudgetMiB, 0, 2048))
+        {
+            eng->_textureCache->set_cpu_source_budget((size_t)cpuBudgetMiB * 1048576ull);
+        }
+
        TextureCache::DebugStats stats{};
        std::vector<TextureCache::DebugRow> rows;
        eng->_textureCache->debug_snapshot(rows, stats);
@@ -535,6 +557,10 @@ void VulkanEngine::init()
    _textureCache = std::make_unique<TextureCache>();
    _textureCache->init(_context.get());
    _context->textures = _textureCache.get();
+    // Conservative defaults to avoid CPU spikes during heavy glTF loads.
+    _textureCache->set_max_loads_per_pump(3);
+    _textureCache->set_keep_source_bytes(false);
+    _textureCache->set_cpu_source_budget(32ull * 1024ull * 1024ull); // 32 MiB

    // Optional ray tracing manager if supported and extensions enabled
    if (_deviceManager->supportsRayQuery() && _deviceManager->supportsAccelerationStructure())
--- a/src/scene/vk_loader.cpp
+++ b/src/scene/vk_loader.cpp
@@ -265,6 +265,9 @@ std::optional<std::shared_ptr<LoadedGLTF> > loadGltf(VulkanEngine *engine, std::
    //< load_arrays

    // Note: glTF images are now loaded on-demand via TextureCache.
+    // Resolve external image paths relative to the source glTF file directory
+    // to avoid failing to find textures when running from a different CWD.
+    const std::filesystem::path baseDir = path.parent_path();
    auto buildTextureKey = [&](size_t imgIndex, bool srgb) -> TextureCache::TextureKey
    {
        TextureCache::TextureKey key{};
@@ -279,10 +282,16 @@ std::optional<std::shared_ptr<LoadedGLTF> > loadGltf(VulkanEngine *engine, std::
        std::visit(fastgltf::visitor{
            [&](fastgltf::sources::URI &filePath)
            {
-                const std::string path(filePath.uri.path().begin(), filePath.uri.path().end());
+                const std::string rel(filePath.uri.path().begin(), filePath.uri.path().end());
+                // Build an absolute (or at least baseDir-resolved) path for IO + stable keying
+                std::filesystem::path resolved = std::filesystem::path(rel);
+                if (resolved.is_relative())
+                {
+                    resolved = baseDir / resolved;
+                }
                key.kind = TextureCache::TextureKey::SourceKind::FilePath;
-                key.path = path;
-                std::string id = std::string("GLTF:") + path + (srgb ? "#sRGB" : "#UNORM");
+                key.path = resolved.string();
+                std::string id = std::string("GLTF:") + key.path + (srgb ? "#sRGB" : "#UNORM");
                key.hash = texcache::fnv1a64(id);
            },
            [&](fastgltf::sources::Vector &vector)
@@ -577,6 +586,19 @@ std::optional<std::shared_ptr<LoadedGLTF> > loadGltf(VulkanEngine *engine, std::
        }

        newmesh->meshBuffers = engine->_resourceManager->uploadMesh(indices, vertices);
+        // If CPU vectors ballooned for this mesh, release capacity back to the OS
+        auto shrink_if_huge = [](auto &vec, size_t elemSizeBytes) {
+            const size_t capBytes = vec.capacity() * elemSizeBytes;
+            const size_t kThreshold = 64ull * 1024ull * 1024ull; // 64 MiB
+            if (capBytes > kThreshold)
+            {
+                using Vec = std::remove_reference_t<decltype(vec)>;
+                Vec empty;
+                vec.swap(empty);
+            }
+        };
+        shrink_if_huge(indices, sizeof(uint32_t));
+        shrink_if_huge(vertices, sizeof(Vertex));
        if (engine->_rayManager)
        {
            engine->_rayManager->getOrBuildBLAS(newmesh);
@@ -646,6 +668,25 @@ std::optional<std::shared_ptr<LoadedGLTF> > loadGltf(VulkanEngine *engine, std::
            node->refreshTransform(glm::mat4{1.f});
        }
    }
+    // We no longer need glTF-owned buffer payloads; free any large vectors
+    for (auto &buf : gltf.buffers)
+    {
+        std::visit(fastgltf::visitor{
+            [](auto &arg) {},
+            [&](fastgltf::sources::Vector &vec) {
+                std::vector<uint8_t>().swap(vec.bytes);
+            }
+        }, buf.data);
+    }
+    for (auto &img : gltf.images)
+    {
+        std::visit(fastgltf::visitor{
+            [](auto &arg) {},
+            [&](fastgltf::sources::Vector &vec) {
+                std::vector<uint8_t>().swap(vec.bytes);
+            }
+        }, img.data);
+    }
    return scene;
    //< load_graph
 }