ADD: texture prefetch

This commit is contained in:
2025-11-06 14:36:03 +09:00
parent dad6db971b
commit 6be30e98b4
6 changed files with 291 additions and 12 deletions

View File

@@ -11,6 +11,9 @@
#include <stb_image.h>
#include "asset_locator.h"
#include <core/texture_cache.h>
#include <fastgltf/parser.hpp>
#include <fastgltf/util.hpp>
#include <fastgltf/tools.hpp>
using std::filesystem::path;
@@ -227,6 +230,116 @@ std::shared_ptr<MeshAsset> AssetManager::createMesh(const MeshCreateInfo &info)
return mesh;
}
size_t AssetManager::prefetchGLTFTextures(std::string_view nameOrPath)
{
if (!_engine || !_engine->_context || !_engine->_context->textures) return 0;
if (nameOrPath.empty()) return 0;
std::string resolved = assetPath(nameOrPath);
std::filesystem::path path = resolved;
fastgltf::Parser parser{};
constexpr auto gltfOptions = fastgltf::Options::DontRequireValidAssetMember | fastgltf::Options::AllowDouble |
fastgltf::Options::LoadGLBBuffers | fastgltf::Options::LoadExternalBuffers;
fastgltf::GltfDataBuffer data;
if (!data.loadFromFile(path)) return 0;
fastgltf::Asset gltf;
size_t scheduled = 0;
auto type = fastgltf::determineGltfFileType(&data);
if (type == fastgltf::GltfType::glTF)
{
auto load = parser.loadGLTF(&data, path.parent_path(), gltfOptions);
if (load) gltf = std::move(load.get()); else return 0;
}
else if (type == fastgltf::GltfType::GLB)
{
auto load = parser.loadBinaryGLTF(&data, path.parent_path(), gltfOptions);
if (load) gltf = std::move(load.get()); else return 0;
}
else
{
return 0;
}
TextureCache *cache = _engine->_context->textures;
auto enqueueTex = [&](size_t imgIndex, bool srgb)
{
if (imgIndex >= gltf.images.size()) return;
TextureCache::TextureKey key{};
key.srgb = srgb;
key.mipmapped = true;
fastgltf::Image &image = gltf.images[imgIndex];
std::visit(fastgltf::visitor{
[&](fastgltf::sources::URI &filePath)
{
const std::string p(filePath.uri.path().begin(), filePath.uri.path().end());
key.kind = TextureCache::TextureKey::SourceKind::FilePath;
key.path = p;
std::string id = std::string("GLTF-PREF:") + p + (srgb ? "#sRGB" : "#UNORM");
key.hash = texcache::fnv1a64(id);
},
[&](fastgltf::sources::Vector &vector)
{
key.kind = TextureCache::TextureKey::SourceKind::Bytes;
key.bytes.assign(vector.bytes.begin(), vector.bytes.end());
uint64_t h = texcache::fnv1a64(key.bytes.data(), key.bytes.size());
key.hash = h ^ (srgb ? 0x9E3779B97F4A7C15ull : 0ull);
},
[&](fastgltf::sources::BufferView &view)
{
auto &bufferView = gltf.bufferViews[view.bufferViewIndex];
auto &buffer = gltf.buffers[bufferView.bufferIndex];
std::visit(fastgltf::visitor{
[](auto &arg) {},
[&](fastgltf::sources::Vector &vec)
{
size_t off = bufferView.byteOffset;
size_t len = bufferView.byteLength;
key.kind = TextureCache::TextureKey::SourceKind::Bytes;
key.bytes.assign(vec.bytes.begin() + off, vec.bytes.begin() + off + len);
uint64_t h = texcache::fnv1a64(key.bytes.data(), key.bytes.size());
key.hash = h ^ (srgb ? 0x9E3779B97F4A7C15ull : 0ull);
}
}, buffer.data);
},
[](auto &other) {}
}, image.data);
if (key.hash != 0)
{
VkSampler samp = _engine->_samplerManager->defaultLinear();
cache->request(key, samp);
scheduled++;
}
};
for (const auto &tex : gltf.textures)
{
if (tex.imageIndex.has_value())
{
// For baseColor we prefer sRGB; other maps requested later will reuse entry
enqueueTex(tex.imageIndex.value(), true);
}
}
// Proactively free big buffer vectors we no longer need.
for (auto &buf : gltf.buffers)
{
std::visit(fastgltf::visitor{
[](auto &arg) {},
[&](fastgltf::sources::Vector &vec) {
std::vector<uint8_t>().swap(vec.bytes);
}
}, buf.data);
}
return scheduled;
}
static Bounds compute_bounds(std::span<Vertex> vertices)
{
Bounds b{};

View File

@@ -76,6 +76,13 @@ public:
std::optional<std::shared_ptr<LoadedGLTF> > loadGLTF(std::string_view nameOrPath);
// Queue texture loads for a glTF file ahead of time. This parses the glTF,
// builds TextureCache keys for referenced images (both external URIs and
// embedded images in buffers), and issues TextureCache::request() calls.
// Actual uploads happen via the normal per-frame pump.
// Returns number of textures scheduled.
size_t prefetchGLTFTextures(std::string_view nameOrPath);
std::shared_ptr<MeshAsset> createMesh(const MeshCreateInfo &info);
std::shared_ptr<MeshAsset> getPrimitive(std::string_view name) const;

View File

@@ -94,6 +94,7 @@ TextureCache::TextureHandle TextureCache::request(const TextureKey &key, VkSampl
else
{
e.bytes = normKey.bytes;
_cpuSourceBytes += e.bytes.size();
}
_entries.push_back(std::move(e));
return h;
@@ -185,7 +186,6 @@ void TextureCache::patch_to_fallback(const Entry &e)
void TextureCache::pumpLoads(ResourceManager &rm, FrameResources &)
{
// Simple throttle to avoid massive spikes.
const int kMaxLoadsPerPump = 4;
int started = 0;
const uint32_t now = _context ? _context->frameIndex : 0u;
for (auto &e : _entries)
@@ -204,13 +204,16 @@ void TextureCache::pumpLoads(ResourceManager &rm, FrameResources &)
if (recentlyUsed)
{
enqueue_decode(e);
if (++started >= kMaxLoadsPerPump) break;
if (++started >= _maxLoadsPerPump) break;
}
}
}
// Drain decoded results and enqueue GPU uploads.
drain_ready_uploads(rm);
// Optionally trim retained compressed sources to CPU budget.
evictCpuToBudget();
}
void TextureCache::evictToBudget(size_t budgetBytes)
@@ -298,11 +301,13 @@ void TextureCache::worker_loop()
out.srgb = rq.key.srgb;
if (data && w > 0 && h > 0)
{
size_t sz = static_cast<size_t>(w) * static_cast<size_t>(h) * 4u;
out.rgba.resize(sz);
memcpy(out.rgba.data(), data, sz);
out.heap = data;
out.heapBytes = static_cast<size_t>(w) * static_cast<size_t>(h) * 4u;
}
else if (data)
{
stbi_image_free(data);
}
if (data) stbi_image_free(data);
{
std::lock_guard<std::mutex> lk(_readyMutex);
@@ -324,7 +329,7 @@ void TextureCache::drain_ready_uploads(ResourceManager &rm)
{
if (res.handle == InvalidHandle || res.handle >= _entries.size()) continue;
Entry &e = _entries[res.handle];
if (res.rgba.empty() || res.width <= 0 || res.height <= 0)
if ((res.heap == nullptr && res.rgba.empty()) || res.width <= 0 || res.height <= 0)
{
e.state = EntryState::Evicted; // failed decode; keep fallback
continue;
@@ -332,8 +337,16 @@ void TextureCache::drain_ready_uploads(ResourceManager &rm)
VkExtent3D extent{static_cast<uint32_t>(res.width), static_cast<uint32_t>(res.height), 1u};
VkFormat fmt = res.srgb ? VK_FORMAT_R8G8B8A8_SRGB : VK_FORMAT_R8G8B8A8_UNORM;
e.image = rm.create_image(static_cast<void *>(res.rgba.data()), extent, fmt,
VK_IMAGE_USAGE_SAMPLED_BIT, res.mipmapped);
const void *src = nullptr;
if (res.heap)
{
src = static_cast<const void *>(res.heap);
}
else
{
src = static_cast<const void *>(res.rgba.data());
}
e.image = rm.create_image(src, extent, fmt, VK_IMAGE_USAGE_SAMPLED_BIT, res.mipmapped);
if (vmaDebugEnabled())
{
@@ -346,11 +359,58 @@ void TextureCache::drain_ready_uploads(ResourceManager &rm)
_residentBytes += e.sizeBytes;
e.state = EntryState::Resident;
// Drop source bytes if policy says so (only for Bytes-backed keys).
if (!_keepSourceBytes && e.key.kind == TextureKey::SourceKind::Bytes)
{
drop_source_bytes(e);
}
// Free temporary decode heap if present
if (res.heap)
{
stbi_image_free(res.heap);
}
// Patch descriptors now; data becomes valid before sampling due to RG upload pass
patch_ready_entry(e);
}
}
void TextureCache::drop_source_bytes(Entry &e)
{
if (e.bytes.empty()) return;
if (e.key.kind != TextureKey::SourceKind::Bytes) return;
if (_cpuSourceBytes >= e.bytes.size()) _cpuSourceBytes -= e.bytes.size();
e.bytes.clear();
e.bytes.shrink_to_fit();
e.path.clear();
}
void TextureCache::evictCpuToBudget()
{
if (_cpuSourceBytes <= _cpuSourceBudget) return;
// Collect candidates: Resident entries with retained bytes
std::vector<TextureHandle> cands;
cands.reserve(_entries.size());
for (TextureHandle h = 0; h < _entries.size(); ++h)
{
const Entry &e = _entries[h];
if (e.state == EntryState::Resident && !e.bytes.empty() && e.key.kind == TextureKey::SourceKind::Bytes)
{
cands.push_back(h);
}
}
// LRU-ish: sort by lastUsed ascending
std::sort(cands.begin(), cands.end(), [&](TextureHandle a, TextureHandle b){
return _entries[a].lastUsedFrame < _entries[b].lastUsedFrame;
});
for (TextureHandle h : cands)
{
if (_cpuSourceBytes <= _cpuSourceBudget) break;
drop_source_bytes(_entries[h]);
}
}
void TextureCache::debug_snapshot(std::vector<DebugRow> &outRows, DebugStats &outStats) const
{
outRows.clear();

View File

@@ -76,6 +76,25 @@ public:
};
void debug_snapshot(std::vector<DebugRow>& outRows, DebugStats& outStats) const;
size_t resident_bytes() const { return _residentBytes; }
// CPU-side source bytes currently retained (compressed image payloads kept
// for potential re-decode). Only applies to entries created with Bytes keys.
size_t cpu_source_bytes() const { return _cpuSourceBytes; }
// Runtime controls
void set_max_loads_per_pump(int n) { _maxLoadsPerPump = (n > 0) ? n : 1; }
int max_loads_per_pump() const { return _maxLoadsPerPump; }
// If false (default), compressed source bytes are dropped once an image is
// uploaded to the GPU and descriptors patched. Set true to retain sources
// for potential re-decode after eviction.
void set_keep_source_bytes(bool keep) { _keepSourceBytes = keep; }
bool keep_source_bytes() const { return _keepSourceBytes; }
// Set a soft CPU budget (in bytes) for retained compressed sources. After
// each upload drain, the cache will try to free source bytes for Resident
// entries until under budget.
void set_cpu_source_budget(size_t bytes) { _cpuSourceBudget = bytes; }
size_t cpu_source_budget() const { return _cpuSourceBudget; }
private:
struct Patch
@@ -108,6 +127,12 @@ private:
std::unordered_map<uint64_t, TextureHandle> _lookup; // key.hash -> handle
std::unordered_map<VkDescriptorSet, std::vector<TextureHandle>> _setToHandles;
size_t _residentBytes{0};
size_t _cpuSourceBytes{0};
// Controls
int _maxLoadsPerPump{4};
bool _keepSourceBytes{false};
size_t _cpuSourceBudget{64ull * 1024ull * 1024ull}; // 64 MiB default
void start_load(Entry &e, ResourceManager &rm);
void patch_ready_entry(const Entry &e);
@@ -126,6 +151,11 @@ private:
TextureHandle handle{InvalidHandle};
int width{0};
int height{0};
// Prefer heap pointer from stb to avoid an extra memcpy into a vector.
// If 'heap' is non-null, it must be freed with stbi_image_free() after
// the upload has copied the data. 'rgba' remains as a fallback path.
unsigned char *heap{nullptr};
size_t heapBytes{0};
std::vector<uint8_t> rgba;
bool mipmapped{true};
bool srgb{false};
@@ -134,6 +164,8 @@ private:
void worker_loop();
void enqueue_decode(Entry &e);
void drain_ready_uploads(ResourceManager &rm);
void drop_source_bytes(Entry &e);
void evictCpuToBudget();
std::vector<std::thread> _decodeThreads;
std::mutex _qMutex;

View File

@@ -174,15 +174,37 @@ namespace {
const size_t texBudget = query_texture_budget_bytes(dev);
const size_t resBytes = eng->_textureCache->resident_bytes();
const size_t cpuSrcBytes = eng->_textureCache->cpu_source_bytes();
ImGui::Text("Device local: %.1f / %.1f MiB", (double)devLocalUsage/1048576.0, (double)devLocalBudget/1048576.0);
ImGui::Text("Texture budget: %.1f MiB", (double)texBudget/1048576.0);
ImGui::Text("Resident textures: %.1f MiB", (double)resBytes/1048576.0);
ImGui::Text("CPU source bytes: %.1f MiB", (double)cpuSrcBytes/1048576.0);
ImGui::SameLine();
if (ImGui::Button("Trim To Budget Now"))
{
eng->_textureCache->evictToBudget(texBudget);
}
// Controls
static int loadsPerPump = 4;
loadsPerPump = eng->_textureCache->max_loads_per_pump();
if (ImGui::SliderInt("Loads/Frame", &loadsPerPump, 1, 16))
{
eng->_textureCache->set_max_loads_per_pump(loadsPerPump);
}
static bool keepSources = false;
keepSources = eng->_textureCache->keep_source_bytes();
if (ImGui::Checkbox("Keep Source Bytes", &keepSources))
{
eng->_textureCache->set_keep_source_bytes(keepSources);
}
static int cpuBudgetMiB = 64;
cpuBudgetMiB = (int)(eng->_textureCache->cpu_source_budget() / 1048576ull);
if (ImGui::SliderInt("CPU Source Budget (MiB)", &cpuBudgetMiB, 0, 2048))
{
eng->_textureCache->set_cpu_source_budget((size_t)cpuBudgetMiB * 1048576ull);
}
TextureCache::DebugStats stats{};
std::vector<TextureCache::DebugRow> rows;
eng->_textureCache->debug_snapshot(rows, stats);
@@ -535,6 +557,10 @@ void VulkanEngine::init()
_textureCache = std::make_unique<TextureCache>();
_textureCache->init(_context.get());
_context->textures = _textureCache.get();
// Conservative defaults to avoid CPU spikes during heavy glTF loads.
_textureCache->set_max_loads_per_pump(3);
_textureCache->set_keep_source_bytes(false);
_textureCache->set_cpu_source_budget(32ull * 1024ull * 1024ull); // 32 MiB
// Optional ray tracing manager if supported and extensions enabled
if (_deviceManager->supportsRayQuery() && _deviceManager->supportsAccelerationStructure())

View File

@@ -265,6 +265,9 @@ std::optional<std::shared_ptr<LoadedGLTF> > loadGltf(VulkanEngine *engine, std::
//< load_arrays
// Note: glTF images are now loaded on-demand via TextureCache.
// Resolve external image paths relative to the source glTF file directory
// to avoid failing to find textures when running from a different CWD.
const std::filesystem::path baseDir = path.parent_path();
auto buildTextureKey = [&](size_t imgIndex, bool srgb) -> TextureCache::TextureKey
{
TextureCache::TextureKey key{};
@@ -279,10 +282,16 @@ std::optional<std::shared_ptr<LoadedGLTF> > loadGltf(VulkanEngine *engine, std::
std::visit(fastgltf::visitor{
[&](fastgltf::sources::URI &filePath)
{
const std::string path(filePath.uri.path().begin(), filePath.uri.path().end());
const std::string rel(filePath.uri.path().begin(), filePath.uri.path().end());
// Build an absolute (or at least baseDir-resolved) path for IO + stable keying
std::filesystem::path resolved = std::filesystem::path(rel);
if (resolved.is_relative())
{
resolved = baseDir / resolved;
}
key.kind = TextureCache::TextureKey::SourceKind::FilePath;
key.path = path;
std::string id = std::string("GLTF:") + path + (srgb ? "#sRGB" : "#UNORM");
key.path = resolved.string();
std::string id = std::string("GLTF:") + key.path + (srgb ? "#sRGB" : "#UNORM");
key.hash = texcache::fnv1a64(id);
},
[&](fastgltf::sources::Vector &vector)
@@ -577,6 +586,19 @@ std::optional<std::shared_ptr<LoadedGLTF> > loadGltf(VulkanEngine *engine, std::
}
newmesh->meshBuffers = engine->_resourceManager->uploadMesh(indices, vertices);
// If CPU vectors ballooned for this mesh, release capacity back to the OS
auto shrink_if_huge = [](auto &vec, size_t elemSizeBytes) {
const size_t capBytes = vec.capacity() * elemSizeBytes;
const size_t kThreshold = 64ull * 1024ull * 1024ull; // 64 MiB
if (capBytes > kThreshold)
{
using Vec = std::remove_reference_t<decltype(vec)>;
Vec empty;
vec.swap(empty);
}
};
shrink_if_huge(indices, sizeof(uint32_t));
shrink_if_huge(vertices, sizeof(Vertex));
if (engine->_rayManager)
{
engine->_rayManager->getOrBuildBLAS(newmesh);
@@ -646,6 +668,25 @@ std::optional<std::shared_ptr<LoadedGLTF> > loadGltf(VulkanEngine *engine, std::
node->refreshTransform(glm::mat4{1.f});
}
}
// We no longer need glTF-owned buffer payloads; free any large vectors
for (auto &buf : gltf.buffers)
{
std::visit(fastgltf::visitor{
[](auto &arg) {},
[&](fastgltf::sources::Vector &vec) {
std::vector<uint8_t>().swap(vec.bytes);
}
}, buf.data);
}
for (auto &img : gltf.images)
{
std::visit(fastgltf::visitor{
[](auto &arg) {},
[&](fastgltf::sources::Vector &vec) {
std::vector<uint8_t>().swap(vec.bytes);
}
}, img.data);
}
return scene;
//< load_graph
}