Files
QuaternionEngine/src/core/texture_cache.cpp

944 lines
36 KiB
C++

#include "texture_cache.h"
#include <core/engine_context.h>
#include <core/vk_resource.h>
#include <core/vk_descriptors.h>
#include <core/config.h>
#include <algorithm>
#include "stb_image.h"
#include <ktx.h>
#include <ktxvulkan.h>
#include <algorithm>
#include "vk_device.h"
#include <cstring>
#include <filesystem>
#include <fstream>
#include <limits>
#include <cmath>
void TextureCache::init(EngineContext *ctx)
{
_context = ctx;
_running = true;
unsigned int threads = std::max(1u, std::min(4u, std::thread::hardware_concurrency()));
_decodeThreads.reserve(threads);
for (unsigned int i = 0; i < threads; ++i)
{
_decodeThreads.emplace_back([this]() { worker_loop(); });
}
}
void TextureCache::cleanup()
{
// Stop worker thread first
if (_running.exchange(false))
{
{
std::lock_guard<std::mutex> lk(_qMutex);
}
_qCV.notify_all();
for (auto &t : _decodeThreads) if (t.joinable()) t.join();
_decodeThreads.clear();
}
if (!_context || !_context->getResources()) return;
auto *rm = _context->getResources();
for (TextureHandle h = 0; h < _entries.size(); ++h)
{
auto &e = _entries[h];
if (e.state == EntryState::Resident && e.image.image)
{
fmt::println("[TextureCache] cleanup destroy handle={} path='{}' bytes={}",
h,
e.path.empty() ? "<bytes>" : e.path,
e.sizeBytes);
rm->destroy_image(e.image);
e.image = {};
}
e.state = EntryState::Evicted;
}
_residentBytes = 0;
_lookup.clear();
_setToHandles.clear();
}
TextureCache::TextureHandle TextureCache::request(const TextureKey &key, VkSampler sampler)
{
// Ensure we have a valid, stable hash for deduplication.
TextureKey normKey = key;
if (normKey.hash == 0)
{
if (normKey.kind == TextureKey::SourceKind::FilePath)
{
std::string id = std::string("PATH:") + normKey.path + (normKey.srgb ? "#sRGB" : "#UNORM");
normKey.hash = texcache::fnv1a64(id);
}
else if (!normKey.bytes.empty())
{
uint64_t h = texcache::fnv1a64(normKey.bytes.data(), normKey.bytes.size());
normKey.hash = h ^ (normKey.srgb ? 0x9E3779B97F4A7C15ull : 0ull);
}
}
auto it = _lookup.find(normKey.hash);
if (it != _lookup.end())
{
TextureHandle h = it->second;
// Keep most recent sampler for future patches if provided
if (h < _entries.size() && sampler != VK_NULL_HANDLE)
{
_entries[h].sampler = sampler;
}
return h;
}
TextureHandle h = static_cast<TextureHandle>(_entries.size());
_lookup.emplace(normKey.hash, h);
Entry e{};
e.key = normKey;
e.sampler = sampler;
e.state = EntryState::Unloaded;
if (normKey.kind == TextureKey::SourceKind::FilePath)
{
e.path = normKey.path;
}
else
{
e.bytes = normKey.bytes;
_cpuSourceBytes += e.bytes.size();
}
fmt::println("[TextureCache] request handle={} kind={} path='{}' srgb={} mipmapped={} hash=0x{:016x}",
h,
(normKey.kind == TextureKey::SourceKind::FilePath ? "FilePath" : "Bytes"),
normKey.kind == TextureKey::SourceKind::FilePath ? normKey.path : "<bytes>",
normKey.srgb,
normKey.mipmapped,
normKey.hash);
_entries.push_back(std::move(e));
return h;
}
void TextureCache::watchBinding(TextureHandle handle, VkDescriptorSet set, uint32_t binding,
VkSampler sampler, VkImageView fallbackView)
{
if (handle == InvalidHandle) return;
if (handle >= _entries.size()) return;
Entry &e = _entries[handle];
// Track patch
Patch p{};
p.set = set;
p.binding = binding;
p.sampler = sampler ? sampler : e.sampler;
p.fallbackView = fallbackView;
e.patches.push_back(p);
// Back-reference for fast per-set markUsed
_setToHandles[set].push_back(handle);
// If the texture is already resident, immediately patch the new descriptor
// so re-spawned models using cached textures get the correct bindings.
if (e.state == EntryState::Resident && e.image.imageView != VK_NULL_HANDLE && set != VK_NULL_HANDLE)
{
if (!_context || !_context->getDevice()) return;
DescriptorWriter writer;
writer.write_image(static_cast<int>(binding), e.image.imageView,
p.sampler ? p.sampler : e.sampler,
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER);
writer.update_set(_context->getDevice()->device(), set);
}
}
void TextureCache::unwatchSet(VkDescriptorSet set)
{
if (set == VK_NULL_HANDLE) return;
auto it = _setToHandles.find(set);
if (it == _setToHandles.end()) return;
const auto &handles = it->second;
for (TextureHandle h : handles)
{
if (h >= _entries.size()) continue;
auto &patches = _entries[h].patches;
patches.erase(std::remove_if(patches.begin(), patches.end(),
[&](const Patch &p){ return p.set == set; }),
patches.end());
}
_setToHandles.erase(it);
}
void TextureCache::markUsed(TextureHandle handle, uint32_t frameIndex)
{
if (handle == InvalidHandle) return;
if (handle >= _entries.size()) return;
_entries[handle].lastUsedFrame = frameIndex;
}
void TextureCache::markSetUsed(VkDescriptorSet set, uint32_t frameIndex)
{
auto it = _setToHandles.find(set);
if (it == _setToHandles.end()) return;
for (TextureHandle h : it->second)
{
if (h < _entries.size())
{
_entries[h].lastUsedFrame = frameIndex;
}
}
}
static inline size_t bytes_per_texel(VkFormat fmt)
{
switch (fmt)
{
case VK_FORMAT_R8_UNORM:
case VK_FORMAT_R8_SRGB:
return 1;
case VK_FORMAT_R8G8_UNORM:
case VK_FORMAT_R8G8_SRGB:
return 2;
case VK_FORMAT_R8G8B8A8_UNORM:
case VK_FORMAT_R8G8B8A8_SRGB:
case VK_FORMAT_B8G8R8A8_UNORM:
case VK_FORMAT_B8G8R8A8_SRGB:
return 4;
default:
return 4;
}
}
static inline VkFormat to_srgb_variant(VkFormat fmt)
{
switch (fmt)
{
case VK_FORMAT_BC1_RGB_UNORM_BLOCK: return VK_FORMAT_BC1_RGB_SRGB_BLOCK;
case VK_FORMAT_BC1_RGBA_UNORM_BLOCK: return VK_FORMAT_BC1_RGBA_SRGB_BLOCK;
case VK_FORMAT_BC2_UNORM_BLOCK: return VK_FORMAT_BC2_SRGB_BLOCK;
case VK_FORMAT_BC3_UNORM_BLOCK: return VK_FORMAT_BC3_SRGB_BLOCK;
case VK_FORMAT_BC7_UNORM_BLOCK: return VK_FORMAT_BC7_SRGB_BLOCK;
case VK_FORMAT_R8G8B8A8_UNORM: return VK_FORMAT_R8G8B8A8_SRGB;
case VK_FORMAT_B8G8R8A8_UNORM: return VK_FORMAT_B8G8R8A8_SRGB;
case VK_FORMAT_R8_UNORM: return VK_FORMAT_R8_SRGB;
case VK_FORMAT_R8G8_UNORM: return VK_FORMAT_R8G8_SRGB;
default: return fmt;
}
}
static inline VkFormat to_unorm_variant(VkFormat fmt)
{
switch (fmt)
{
case VK_FORMAT_BC1_RGB_SRGB_BLOCK: return VK_FORMAT_BC1_RGB_UNORM_BLOCK;
case VK_FORMAT_BC1_RGBA_SRGB_BLOCK: return VK_FORMAT_BC1_RGBA_UNORM_BLOCK;
case VK_FORMAT_BC2_SRGB_BLOCK: return VK_FORMAT_BC2_UNORM_BLOCK;
case VK_FORMAT_BC3_SRGB_BLOCK: return VK_FORMAT_BC3_UNORM_BLOCK;
case VK_FORMAT_BC7_SRGB_BLOCK: return VK_FORMAT_BC7_UNORM_BLOCK;
case VK_FORMAT_R8G8B8A8_SRGB: return VK_FORMAT_R8G8B8A8_UNORM;
case VK_FORMAT_B8G8R8A8_SRGB: return VK_FORMAT_B8G8R8A8_UNORM;
case VK_FORMAT_R8_SRGB: return VK_FORMAT_R8_UNORM;
case VK_FORMAT_R8G8_SRGB: return VK_FORMAT_R8G8_UNORM;
default: return fmt;
}
}
static inline float mip_factor_for_levels(uint32_t levels)
{
if (levels <= 1) return 1.0f;
// Sum of geometric series for area across mips (base * (1 + 1/4 + ...))
// factor = (1 - 4^{-L}) / (1 - 1/4) = 4/3 * (1 - 4^{-L})
float L = static_cast<float>(levels);
return 1.3333333f * (1.0f - std::pow(0.25f, L));
}
static inline VkFormat choose_format(TextureCache::TextureKey::ChannelsHint hint, bool srgb)
{
using CH = TextureCache::TextureKey::ChannelsHint;
switch (hint)
{
case CH::R: return srgb ? VK_FORMAT_R8_SRGB : VK_FORMAT_R8_UNORM;
case CH::RG: return srgb ? VK_FORMAT_R8G8_SRGB : VK_FORMAT_R8G8_UNORM;
case CH::RGBA:
case CH::Auto:
default: return srgb ? VK_FORMAT_R8G8B8A8_SRGB : VK_FORMAT_R8G8B8A8_UNORM;
}
}
// Nearest-neighbor downscale-by-2 in-place helper (returns newly allocated buffer)
static std::vector<uint8_t> downscale_half(const unsigned char* src, int w, int h, int comps)
{
int nw = std::max(1, w / 2);
int nh = std::max(1, h / 2);
std::vector<uint8_t> out(static_cast<size_t>(nw) * nh * comps);
for (int y = 0; y < nh; ++y)
{
for (int x = 0; x < nw; ++x)
{
int sx = std::min(w - 1, x * 2);
int sy = std::min(h - 1, y * 2);
const unsigned char* sp = src + (static_cast<size_t>(sy) * w + sx) * comps;
unsigned char* dp = out.data() + (static_cast<size_t>(y) * nw + x) * comps;
std::memcpy(dp, sp, comps);
}
}
return out;
}
void TextureCache::start_load(Entry &e, ResourceManager &rm)
{
// Legacy synchronous path retained for completeness but not used by pumpLoads now.
enqueue_decode(e);
}
void TextureCache::patch_ready_entry(const Entry &e)
{
if (!_context || !_context->getDevice()) return;
if (e.state != EntryState::Resident) return;
DescriptorWriter writer;
for (const Patch &p : e.patches)
{
if (p.set == VK_NULL_HANDLE) continue;
writer.clear();
writer.write_image(static_cast<int>(p.binding), e.image.imageView,
p.sampler ? p.sampler : e.sampler,
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER);
writer.update_set(_context->getDevice()->device(), p.set);
}
}
void TextureCache::patch_to_fallback(const Entry &e)
{
if (!_context || !_context->getDevice()) return;
DescriptorWriter writer;
for (const Patch &p : e.patches)
{
if (p.set == VK_NULL_HANDLE || p.fallbackView == VK_NULL_HANDLE) continue;
writer.clear();
writer.write_image(static_cast<int>(p.binding), p.fallbackView,
p.sampler ? p.sampler : e.sampler,
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER);
writer.update_set(_context->getDevice()->device(), p.set);
}
}
void TextureCache::pumpLoads(ResourceManager &rm, FrameResources &)
{
// Simple throttle to avoid massive spikes.
int started = 0;
const uint32_t now = _context ? _context->frameIndex : 0u;
// First, drain decoded results with a byte budget.
size_t admitted = drain_ready_uploads(rm, _maxBytesPerPump);
// If we exhausted the budget, avoid scheduling more decodes this frame.
bool budgetRemaining = (admitted < _maxBytesPerPump);
for (auto &e : _entries)
{
// Allow both Unloaded and Evicted entries to start work if seen again.
if (e.state == EntryState::Unloaded || e.state == EntryState::Evicted)
{
// Visibility-driven residency: only start uploads for textures
// that were marked used recently (current or previous frame).
// This avoids uploading assets that are not visible.
bool recentlyUsed = true;
if (_context)
{
// Schedule when first seen (previous frame) or if seen again.
recentlyUsed = (now == 0u) || (now - e.lastUsedFrame <= 1u);
}
// Gate reload attempts to avoid rapid oscillation right after eviction.
bool cooldownPassed = (now >= e.nextAttemptFrame);
if (recentlyUsed && cooldownPassed && budgetRemaining)
{
enqueue_decode(e);
if (++started >= _maxLoadsPerPump) break;
}
}
}
// Drain any remaining decoded results if we still have headroom.
if (budgetRemaining)
{
drain_ready_uploads(rm, _maxBytesPerPump - admitted);
}
// Optionally trim retained compressed sources to CPU budget.
evictCpuToBudget();
}
void TextureCache::evictToBudget(size_t budgetBytes)
{
if (_residentBytes <= budgetBytes) return;
// Gather candidates
std::vector<std::pair<TextureHandle, uint32_t>> order;
order.reserve(_entries.size());
for (TextureHandle h = 0; h < _entries.size(); ++h)
{
const auto &e = _entries[h];
if (e.state == EntryState::Resident)
{
order.emplace_back(h, e.lastUsedFrame);
}
}
std::sort(order.begin(), order.end(), [](auto &a, auto &b) { return a.second < b.second; });
const uint32_t now = _context ? _context->frameIndex : 0u;
for (auto &pair : order)
{
if (_residentBytes <= budgetBytes) break;
TextureHandle h = pair.first;
Entry &e = _entries[h];
if (e.state != EntryState::Resident) continue;
// Prefer not to evict textures used this frame unless strictly necessary.
if (e.lastUsedFrame == now) continue;
// Rewrite watchers back to fallback before destroying
patch_to_fallback(e);
fmt::println("[TextureCache] evictToBudget destroy handle={} path='{}' bytes={} residentBytesBefore={}",
h,
e.path.empty() ? "<bytes>" : e.path,
e.sizeBytes,
_residentBytes);
_context->getResources()->destroy_image(e.image);
e.image = {};
e.state = EntryState::Evicted;
e.lastEvictedFrame = now;
e.nextAttemptFrame = std::max(e.nextAttemptFrame, now + _reloadCooldownFrames);
if (_residentBytes >= e.sizeBytes) _residentBytes -= e.sizeBytes; else _residentBytes = 0;
}
}
void TextureCache::enqueue_decode(Entry &e)
{
if (e.state != EntryState::Unloaded && e.state != EntryState::Evicted) return;
e.state = EntryState::Loading;
DecodeRequest rq{};
rq.handle = static_cast<TextureHandle>(&e - _entries.data());
rq.key = e.key;
if (e.key.kind == TextureKey::SourceKind::FilePath) rq.path = e.path; else rq.bytes = e.bytes;
{
std::lock_guard<std::mutex> lk(_qMutex);
_queue.push_back(std::move(rq));
}
_qCV.notify_one();
}
void TextureCache::worker_loop()
{
while (_running)
{
DecodeRequest rq{};
{
std::unique_lock<std::mutex> lk(_qMutex);
_qCV.wait(lk, [this]{ return !_running || !_queue.empty(); });
if (!_running) break;
rq = std::move(_queue.front());
_queue.pop_front();
}
DecodedResult out{};
out.handle = rq.handle;
out.mipmapped = rq.key.mipmapped;
out.srgb = rq.key.srgb;
out.channels = rq.key.channels;
out.mipClampLevels = rq.key.mipClampLevels;
// 1) Prefer KTX2 when source is a file path and a .ktx2 version exists
bool attemptedKTX2 = false;
if (rq.key.kind == TextureKey::SourceKind::FilePath)
{
std::filesystem::path p = rq.path;
std::filesystem::path ktxPath;
if (p.extension() == ".ktx2")
{
ktxPath = p;
}
else
{
ktxPath = p;
ktxPath.replace_extension(".ktx2");
}
std::error_code ec;
bool hasKTX2 = (!ktxPath.empty() && std::filesystem::exists(ktxPath, ec) && !ec);
if (hasKTX2)
{
attemptedKTX2 = true;
ktxTexture2* ktex = nullptr;
ktxResult kres = ktxTexture2_CreateFromNamedFile(ktxPath.string().c_str(), KTX_TEXTURE_CREATE_LOAD_IMAGE_DATA_BIT, &ktex);
if (kres != KTX_SUCCESS || !ktex)
{
fmt::println("[TextureCache] libktx open failed for '{}': {}", ktxPath.string(), ktxErrorString(kres));
}
else
{
if (ktxTexture2_NeedsTranscoding(ktex))
{
ktx_transcode_fmt_e target = (rq.key.channels == TextureKey::ChannelsHint::RG) ? KTX_TTF_BC5_RG : KTX_TTF_BC7_RGBA;
kres = ktxTexture2_TranscodeBasis(ktex, target, 0);
if (kres != KTX_SUCCESS)
{
fmt::println("[TextureCache] libktx transcode failed for '{}': {}", ktxPath.string(), ktxErrorString(kres));
ktxTexture_Destroy(ktxTexture(ktex));
ktex = nullptr;
}
}
if (ktex)
{
VkFormat vkfmt = static_cast<VkFormat>(ktex->vkFormat);
uint32_t mipLevels = ktex->numLevels;
uint32_t baseW = ktex->baseWidth;
uint32_t baseH = ktex->baseHeight;
ktx_size_t totalSize = ktxTexture_GetDataSize(ktxTexture(ktex));
const uint8_t* dataPtr = reinterpret_cast<const uint8_t*>(ktxTexture_GetData(ktxTexture(ktex)));
switch (vkfmt)
{
case VK_FORMAT_BC1_RGB_UNORM_BLOCK:
case VK_FORMAT_BC1_RGB_SRGB_BLOCK:
case VK_FORMAT_BC1_RGBA_UNORM_BLOCK:
case VK_FORMAT_BC1_RGBA_SRGB_BLOCK:
case VK_FORMAT_BC2_UNORM_BLOCK:
case VK_FORMAT_BC2_SRGB_BLOCK:
case VK_FORMAT_BC3_UNORM_BLOCK:
case VK_FORMAT_BC3_SRGB_BLOCK:
case VK_FORMAT_BC4_UNORM_BLOCK:
case VK_FORMAT_BC4_SNORM_BLOCK:
case VK_FORMAT_BC5_UNORM_BLOCK:
case VK_FORMAT_BC5_SNORM_BLOCK:
case VK_FORMAT_BC6H_UFLOAT_BLOCK:
case VK_FORMAT_BC6H_SFLOAT_BLOCK:
case VK_FORMAT_BC7_UNORM_BLOCK:
case VK_FORMAT_BC7_SRGB_BLOCK:
break;
default:
fmt::println("[TextureCache] libktx returned non-BC format {} — skipping KTX2", string_VkFormat(vkfmt));
ktxTexture_Destroy(ktxTexture(ktex));
ktex = nullptr;
break;
}
if (ktex)
{
out.isKTX2 = true;
out.ktxFormat = vkfmt;
out.ktxMipLevels = mipLevels;
out.ktx.bytes.assign(dataPtr, dataPtr + totalSize);
out.ktx.levels.clear();
out.ktx.levels.reserve(mipLevels);
for (uint32_t mip = 0; mip < mipLevels; ++mip)
{
ktx_size_t off = 0, len = 0;
ktxTexture_GetImageOffset(ktxTexture(ktex), mip, 0, 0, &off);
ktxTexture_GetImageSize(ktxTexture(ktex), mip, &len);
uint32_t w = std::max(1u, baseW >> mip);
uint32_t h = std::max(1u, baseH >> mip);
out.ktx.levels.push_back({ static_cast<uint64_t>(off), static_cast<uint64_t>(len), w, h });
}
out.width = static_cast<int>(baseW);
out.height = static_cast<int>(baseH);
ktxTexture_Destroy(ktxTexture(ktex));
}
}
}
}
else if (p.extension() == ".ktx2")
{
fmt::println("[TextureCache] Requested .ktx2 '{}' but file not found (ec={})", p.string(), ec.value());
}
}
// 2) Raster fallback via stb_image if not KTX2 or unsupported
if (!out.isKTX2)
{
int w = 0, h = 0, comp = 0;
unsigned char *data = nullptr;
if (rq.key.kind == TextureKey::SourceKind::FilePath)
{
data = stbi_load(rq.path.c_str(), &w, &h, &comp, 4);
}
else if (!rq.bytes.empty())
{
data = stbi_load_from_memory(rq.bytes.data(), static_cast<int>(rq.bytes.size()), &w, &h, &comp, 4);
}
out.width = w;
out.height = h;
if (data && w > 0 && h > 0)
{
// Progressive downscale if requested
if (_maxUploadDimension > 0 && (w > static_cast<int>(_maxUploadDimension) || h > static_cast<int>(_maxUploadDimension)))
{
std::vector<uint8_t> scaled;
scaled.assign(data, data + static_cast<size_t>(w) * h * 4);
int cw = w, ch = h;
while (cw > static_cast<int>(_maxUploadDimension) || ch > static_cast<int>(_maxUploadDimension))
{
auto tmp = downscale_half(scaled.data(), cw, ch, 4);
scaled.swap(tmp);
cw = std::max(1, cw / 2);
ch = std::max(1, ch / 2);
}
stbi_image_free(data);
out.rgba = std::move(scaled);
out.width = cw;
out.height = ch;
}
else
{
out.heap = data;
out.heapBytes = static_cast<size_t>(w) * static_cast<size_t>(h) * 4u;
}
}
else if (data)
{
stbi_image_free(data);
}
}
{
std::lock_guard<std::mutex> lk(_readyMutex);
_ready.push_back(std::move(out));
}
}
}
size_t TextureCache::drain_ready_uploads(ResourceManager &rm, size_t budgetBytes)
{
std::deque<DecodedResult> local;
{
std::lock_guard<std::mutex> lk(_readyMutex);
if (_ready.empty()) return 0;
local.swap(_ready);
}
size_t admitted = 0;
for (auto &res : local)
{
if (res.handle == InvalidHandle || res.handle >= _entries.size()) continue;
Entry &e = _entries[res.handle];
if (!res.isKTX2 && ((res.heap == nullptr && res.rgba.empty()) || res.width <= 0 || res.height <= 0))
{
e.state = EntryState::Evicted; // failed decode; keep fallback
continue;
}
const uint32_t now = _context ? _context->frameIndex : 0u;
VkExtent3D extent{static_cast<uint32_t>(std::max(0, res.width)), static_cast<uint32_t>(std::max(0, res.height)), 1u};
TextureKey::ChannelsHint hint = (e.key.channels == TextureKey::ChannelsHint::Auto)
? TextureKey::ChannelsHint::Auto
: e.key.channels;
size_t expectedBytes = 0;
VkFormat fmt = VK_FORMAT_UNDEFINED;
uint32_t desiredLevels = 1;
if (res.isKTX2)
{
fmt = res.ktxFormat;
// Nudge format to sRGB/UNORM variant based on request to avoid gamma mistakes
VkFormat reqFmt = e.key.srgb ? to_srgb_variant(fmt) : to_unorm_variant(fmt);
if (reqFmt != fmt)
{
fmt = reqFmt;
fmt::println("[TextureCache] Overriding KTX2 format to {} based on request (original {})",
string_VkFormat(fmt), string_VkFormat(res.ktxFormat));
}
desiredLevels = res.ktxMipLevels;
for (const auto &lv : res.ktx.levels) expectedBytes += static_cast<size_t>(lv.length);
}
else
{
fmt = choose_format(hint, res.srgb);
if (res.mipmapped)
{
if (res.mipClampLevels > 0) desiredLevels = res.mipClampLevels;
else desiredLevels = static_cast<uint32_t>(std::floor(std::log2(std::max(extent.width, extent.height)))) + 1u;
}
const float mipFactor = res.mipmapped ? mip_factor_for_levels(desiredLevels) : 1.0f;
expectedBytes = static_cast<size_t>(extent.width) * extent.height * bytes_per_texel(fmt) * mipFactor;
}
// Byte budget for this pump (frame)
if (admitted + expectedBytes > budgetBytes)
{
// push back to be retried next frame/pump
std::lock_guard<std::mutex> lk(_readyMutex);
_ready.push_front(std::move(res));
continue;
}
if (_gpuBudgetBytes != std::numeric_limits<size_t>::max())
{
if (_residentBytes + expectedBytes > _gpuBudgetBytes)
{
size_t need = (_residentBytes + expectedBytes) - _gpuBudgetBytes;
(void)try_make_space(need, now);
}
if (_residentBytes + expectedBytes > _gpuBudgetBytes)
{
// Not enough space even after eviction → back off; free decode heap
if (res.heap) { stbi_image_free(res.heap); res.heap = nullptr; }
e.state = EntryState::Evicted;
e.lastEvictedFrame = now;
e.nextAttemptFrame = std::max(e.nextAttemptFrame, now + _reloadCooldownFrames);
continue;
}
}
if (res.isKTX2)
{
// Basic format support check: ensure the GPU can sample this format
bool supported = true;
if (_context && _context->getDevice())
{
VkFormatProperties props{};
vkGetPhysicalDeviceFormatProperties(_context->getDevice()->physicalDevice(), fmt, &props);
supported = (props.optimalTilingFeatures & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT) != 0;
}
if (!supported)
{
VkFormatProperties props{};
if (_context && _context->getDevice())
{
vkGetPhysicalDeviceFormatProperties(_context->getDevice()->physicalDevice(), fmt, &props);
}
fmt::println("[TextureCache] Compressed format unsupported: format={} (optimalFeatures=0x{:08x}) — fallback raster for {}",
string_VkFormat(fmt), props.optimalTilingFeatures, e.path);
// Fall back to raster path: requeue by synthesizing a non-KTX result
// Attempt synchronous fallback decode from file if available.
int fw = 0, fh = 0, comp = 0;
unsigned char *fdata = nullptr;
if (e.key.kind == TextureKey::SourceKind::FilePath)
{
fdata = stbi_load(e.path.c_str(), &fw, &fh, &comp, 4);
}
if (!fdata)
{
e.state = EntryState::Evicted;
continue;
}
VkExtent3D fext{ (uint32_t)fw, (uint32_t)fh, 1 };
VkFormat ffmt = choose_format(hint, res.srgb);
uint32_t mips = (res.mipmapped) ? static_cast<uint32_t>(std::floor(std::log2(std::max(fext.width, fext.height)))) + 1u : 1u;
e.image = rm.create_image(fdata, fext, ffmt, VK_IMAGE_USAGE_SAMPLED_BIT, res.mipmapped, mips);
stbi_image_free(fdata);
e.sizeBytes = static_cast<size_t>(fext.width) * fext.height * bytes_per_texel(ffmt) * (res.mipmapped ? mip_factor_for_levels(mips) : 1.0f);
}
else
{
// Prepare level table for ResourceManager
std::vector<ResourceManager::MipLevelCopy> levels;
levels.reserve(res.ktx.levels.size());
for (const auto &lv : res.ktx.levels)
{
levels.push_back(ResourceManager::MipLevelCopy{ lv.offset, lv.length, lv.width, lv.height });
}
fmt::println("[TextureCache] upload KTX2 handle={} fmt={} levels={} size={}x{} srgb={} path='{}'",
res.handle,
string_VkFormat(fmt),
res.ktxMipLevels,
extent.width,
extent.height,
res.srgb,
e.path);
e.image = rm.create_image_compressed(res.ktx.bytes.data(), res.ktx.bytes.size(), fmt, levels);
e.sizeBytes = expectedBytes;
}
}
else
{
// Optionally repack channels to R or RG to save memory
std::vector<uint8_t> packed;
const void *src = nullptr;
if (hint == TextureKey::ChannelsHint::R)
{
packed.resize(static_cast<size_t>(extent.width) * extent.height);
const uint8_t* in = res.heap ? res.heap : res.rgba.data();
for (size_t i = 0, px = static_cast<size_t>(extent.width) * extent.height; i < px; ++i)
{
packed[i] = in[i * 4 + 0];
}
src = packed.data();
}
else if (hint == TextureKey::ChannelsHint::RG)
{
packed.resize(static_cast<size_t>(extent.width) * extent.height * 2);
const uint8_t* in = res.heap ? res.heap : res.rgba.data();
for (size_t i = 0, px = static_cast<size_t>(extent.width) * extent.height; i < px; ++i)
{
packed[i * 2 + 0] = in[i * 4 + 0];
packed[i * 2 + 1] = in[i * 4 + 1];
}
src = packed.data();
}
else
{
src = res.heap ? static_cast<const void *>(res.heap)
: static_cast<const void *>(res.rgba.data());
}
uint32_t mipOverride = (res.mipmapped ? desiredLevels : 1);
fmt::println("[TextureCache] upload raster handle={} fmt={} levels={} size={}x{} srgb={} path='{}'",
res.handle,
string_VkFormat(fmt),
mipOverride,
extent.width,
extent.height,
res.srgb,
e.path);
e.image = rm.create_image(src, extent, fmt, VK_IMAGE_USAGE_SAMPLED_BIT, res.mipmapped, mipOverride);
e.sizeBytes = expectedBytes;
}
if (vmaDebugEnabled())
{
std::string name = e.key.kind == TextureKey::SourceKind::FilePath ? e.path : std::string("tex.bytes");
vmaSetAllocationName(_context->getDevice()->allocator(), e.image.allocation, name.c_str());
}
_residentBytes += e.sizeBytes;
e.state = EntryState::Resident;
e.nextAttemptFrame = 0; // clear backoff after success
// Drop source bytes if policy says so (only for Bytes-backed keys).
if (!_keepSourceBytes && e.key.kind == TextureKey::SourceKind::Bytes)
{
drop_source_bytes(e);
}
// Free temporary decode heap if present
if (res.heap)
{
stbi_image_free(res.heap);
}
// Patch descriptors now; data becomes valid before sampling due to RG upload pass
patch_ready_entry(e);
admitted += expectedBytes;
}
return admitted;
}
void TextureCache::drop_source_bytes(Entry &e)
{
if (e.bytes.empty()) return;
if (e.key.kind != TextureKey::SourceKind::Bytes) return;
if (_cpuSourceBytes >= e.bytes.size()) _cpuSourceBytes -= e.bytes.size();
e.bytes.clear();
e.bytes.shrink_to_fit();
e.path.clear();
}
void TextureCache::evictCpuToBudget()
{
if (_cpuSourceBytes <= _cpuSourceBudget) return;
// Collect candidates: Resident entries with retained bytes
std::vector<TextureHandle> cands;
cands.reserve(_entries.size());
for (TextureHandle h = 0; h < _entries.size(); ++h)
{
const Entry &e = _entries[h];
if (e.state == EntryState::Resident && !e.bytes.empty() && e.key.kind == TextureKey::SourceKind::Bytes)
{
cands.push_back(h);
}
}
// LRU-ish: sort by lastUsed ascending
std::sort(cands.begin(), cands.end(), [&](TextureHandle a, TextureHandle b){
return _entries[a].lastUsedFrame < _entries[b].lastUsedFrame;
});
for (TextureHandle h : cands)
{
if (_cpuSourceBytes <= _cpuSourceBudget) break;
drop_source_bytes(_entries[h]);
}
}
bool TextureCache::try_make_space(size_t bytesNeeded, uint32_t now)
{
if (bytesNeeded == 0) return true;
if (_residentBytes == 0) return false;
// Collect candidates that were not used this frame, oldest first
std::vector<std::pair<TextureHandle, uint32_t>> order;
order.reserve(_entries.size());
for (TextureHandle h = 0; h < _entries.size(); ++h)
{
const auto &e = _entries[h];
if (e.state == EntryState::Resident && e.lastUsedFrame != now)
{
order.emplace_back(h, e.lastUsedFrame);
}
}
std::sort(order.begin(), order.end(), [](auto &a, auto &b) { return a.second < b.second; });
size_t freed = 0;
for (auto &pair : order)
{
if (freed >= bytesNeeded) break;
TextureHandle h = pair.first;
Entry &e = _entries[h];
if (e.state != EntryState::Resident) continue;
patch_to_fallback(e);
fmt::println("[TextureCache] try_make_space destroy handle={} path='{}' bytes={} residentBytesBefore={}",
h,
e.path.empty() ? "<bytes>" : e.path,
e.sizeBytes,
_residentBytes);
_context->getResources()->destroy_image(e.image);
e.image = {};
e.state = EntryState::Evicted;
e.lastEvictedFrame = now;
e.nextAttemptFrame = std::max(e.nextAttemptFrame, now + _reloadCooldownFrames);
if (_residentBytes >= e.sizeBytes) _residentBytes -= e.sizeBytes; else _residentBytes = 0;
freed += e.sizeBytes;
}
return freed >= bytesNeeded;
}
void TextureCache::debug_snapshot(std::vector<DebugRow> &outRows, DebugStats &outStats) const
{
outRows.clear();
outStats = DebugStats{};
outStats.residentBytes = _residentBytes;
auto stateToByteable = [&](const Entry &e) -> bool { return e.state == EntryState::Resident; };
for (const auto &e : _entries)
{
switch (e.state)
{
case EntryState::Resident: outStats.countResident++; break;
case EntryState::Evicted: outStats.countEvicted++; break;
case EntryState::Unloaded: outStats.countUnloaded++; break;
case EntryState::Loading: /* ignore */ break;
}
DebugRow row{};
if (e.key.kind == TextureKey::SourceKind::FilePath)
{
row.name = e.path.empty() ? std::string("<path>") : e.path;
}
else
{
row.name = std::string("<bytes> (") + std::to_string(e.bytes.size()) + ")";
}
if (e.state == EntryState::Resident && e.image.image)
{
row.name += std::string(" [") + string_VkFormat(e.image.imageFormat) + "]";
}
row.bytes = e.sizeBytes;
row.lastUsed = e.lastUsedFrame;
row.state = static_cast<uint8_t>(e.state);
outRows.push_back(std::move(row));
}
std::sort(outRows.begin(), outRows.end(), [](const DebugRow &a, const DebugRow &b) {
return a.bytes > b.bytes;
});
}