ADD: texture throttle, downscale, RGB

This commit is contained in:
2025-11-09 00:11:17 +09:00
parent b8f23b27c2
commit 9a11cacd1a
8 changed files with 382 additions and 24 deletions

View File

@@ -10,6 +10,7 @@
#include "vk_device.h"
#include <cstring>
#include <limits>
#include <cmath>
void TextureCache::init(EngineContext *ctx)
{
@@ -157,9 +158,66 @@ void TextureCache::markSetUsed(VkDescriptorSet set, uint32_t frameIndex)
}
}
static inline size_t estimate_rgba8_bytes(uint32_t w, uint32_t h)
static inline size_t bytes_per_texel(VkFormat fmt)
{
return static_cast<size_t>(w) * static_cast<size_t>(h) * 4u;
switch (fmt)
{
case VK_FORMAT_R8_UNORM:
case VK_FORMAT_R8_SRGB:
return 1;
case VK_FORMAT_R8G8_UNORM:
case VK_FORMAT_R8G8_SRGB:
return 2;
case VK_FORMAT_R8G8B8A8_UNORM:
case VK_FORMAT_R8G8B8A8_SRGB:
case VK_FORMAT_B8G8R8A8_UNORM:
case VK_FORMAT_B8G8R8A8_SRGB:
return 4;
default:
return 4;
}
}
static inline float mip_factor_for_levels(uint32_t levels)
{
if (levels <= 1) return 1.0f;
// Sum of geometric series for area across mips (base * (1 + 1/4 + ...))
// factor = (1 - 4^{-L}) / (1 - 1/4) = 4/3 * (1 - 4^{-L})
float L = static_cast<float>(levels);
return 1.3333333f * (1.0f - std::pow(0.25f, L));
}
static inline VkFormat choose_format(TextureCache::TextureKey::ChannelsHint hint, bool srgb)
{
using CH = TextureCache::TextureKey::ChannelsHint;
switch (hint)
{
case CH::R: return srgb ? VK_FORMAT_R8_SRGB : VK_FORMAT_R8_UNORM;
case CH::RG: return srgb ? VK_FORMAT_R8G8_SRGB : VK_FORMAT_R8G8_UNORM;
case CH::RGBA:
case CH::Auto:
default: return srgb ? VK_FORMAT_R8G8B8A8_SRGB : VK_FORMAT_R8G8B8A8_UNORM;
}
}
// Nearest-neighbor downscale-by-2 in-place helper (returns newly allocated buffer)
static std::vector<uint8_t> downscale_half(const unsigned char* src, int w, int h, int comps)
{
int nw = std::max(1, w / 2);
int nh = std::max(1, h / 2);
std::vector<uint8_t> out(static_cast<size_t>(nw) * nh * comps);
for (int y = 0; y < nh; ++y)
{
for (int x = 0; x < nw; ++x)
{
int sx = std::min(w - 1, x * 2);
int sy = std::min(h - 1, y * 2);
const unsigned char* sp = src + (static_cast<size_t>(sy) * w + sx) * comps;
unsigned char* dp = out.data() + (static_cast<size_t>(y) * nw + x) * comps;
std::memcpy(dp, sp, comps);
}
}
return out;
}
void TextureCache::start_load(Entry &e, ResourceManager &rm)
@@ -207,6 +265,12 @@ void TextureCache::pumpLoads(ResourceManager &rm, FrameResources &)
// Simple throttle to avoid massive spikes.
int started = 0;
const uint32_t now = _context ? _context->frameIndex : 0u;
// First, drain decoded results with a byte budget.
size_t admitted = drain_ready_uploads(rm, _maxBytesPerPump);
// If we exhausted the budget, avoid scheduling more decodes this frame.
bool budgetRemaining = (admitted < _maxBytesPerPump);
for (auto &e : _entries)
{
// Allow both Unloaded and Evicted entries to start work if seen again.
@@ -223,7 +287,7 @@ void TextureCache::pumpLoads(ResourceManager &rm, FrameResources &)
}
// Gate reload attempts to avoid rapid oscillation right after eviction.
bool cooldownPassed = (now >= e.nextAttemptFrame);
if (recentlyUsed && cooldownPassed)
if (recentlyUsed && cooldownPassed && budgetRemaining)
{
enqueue_decode(e);
if (++started >= _maxLoadsPerPump) break;
@@ -231,8 +295,11 @@ void TextureCache::pumpLoads(ResourceManager &rm, FrameResources &)
}
}
// Drain decoded results and enqueue GPU uploads.
drain_ready_uploads(rm);
// Drain any remaining decoded results if we still have headroom.
if (budgetRemaining)
{
drain_ready_uploads(rm, _maxBytesPerPump - admitted);
}
// Optionally trim retained compressed sources to CPU budget.
evictCpuToBudget();
@@ -326,10 +393,33 @@ void TextureCache::worker_loop()
out.height = h;
out.mipmapped = rq.key.mipmapped;
out.srgb = rq.key.srgb;
out.channels = rq.key.channels;
out.mipClampLevels = rq.key.mipClampLevels;
if (data && w > 0 && h > 0)
{
out.heap = data;
out.heapBytes = static_cast<size_t>(w) * static_cast<size_t>(h) * 4u;
// Progressive downscale if requested
if (_maxUploadDimension > 0 && (w > static_cast<int>(_maxUploadDimension) || h > static_cast<int>(_maxUploadDimension)))
{
std::vector<uint8_t> scaled;
scaled.assign(data, data + static_cast<size_t>(w) * h * 4);
int cw = w, ch = h;
while (cw > static_cast<int>(_maxUploadDimension) || ch > static_cast<int>(_maxUploadDimension))
{
auto tmp = downscale_half(scaled.data(), cw, ch, 4);
scaled.swap(tmp);
cw = std::max(1, cw / 2);
ch = std::max(1, ch / 2);
}
stbi_image_free(data);
out.rgba = std::move(scaled);
out.width = cw;
out.height = ch;
}
else
{
out.heap = data;
out.heapBytes = static_cast<size_t>(w) * static_cast<size_t>(h) * 4u;
}
}
else if (data)
{
@@ -343,15 +433,16 @@ void TextureCache::worker_loop()
}
}
void TextureCache::drain_ready_uploads(ResourceManager &rm)
size_t TextureCache::drain_ready_uploads(ResourceManager &rm, size_t budgetBytes)
{
std::deque<DecodedResult> local;
{
std::lock_guard<std::mutex> lk(_readyMutex);
if (_ready.empty()) return;
if (_ready.empty()) return 0;
local.swap(_ready);
}
size_t admitted = 0;
for (auto &res : local)
{
if (res.handle == InvalidHandle || res.handle >= _entries.size()) continue;
@@ -364,11 +455,35 @@ void TextureCache::drain_ready_uploads(ResourceManager &rm)
const uint32_t now = _context ? _context->frameIndex : 0u;
VkExtent3D extent{static_cast<uint32_t>(res.width), static_cast<uint32_t>(res.height), 1u};
VkFormat fmt = res.srgb ? VK_FORMAT_R8G8B8A8_SRGB : VK_FORMAT_R8G8B8A8_UNORM;
TextureKey::ChannelsHint hint = (e.key.channels == TextureKey::ChannelsHint::Auto)
? TextureKey::ChannelsHint::Auto
: e.key.channels;
VkFormat fmt = choose_format(hint, res.srgb);
// Estimate resident size for admission control (match post-upload computation)
const float mipFactor = res.mipmapped ? 1.3333333f : 1.0f;
const size_t expectedBytes = static_cast<size_t>(estimate_rgba8_bytes(extent.width, extent.height) * mipFactor);
uint32_t desiredLevels = 1;
if (res.mipmapped)
{
if (res.mipClampLevels > 0)
{
desiredLevels = res.mipClampLevels;
}
else
{
desiredLevels = static_cast<uint32_t>(std::floor(std::log2(std::max(extent.width, extent.height)))) + 1u;
}
}
const float mipFactor = res.mipmapped ? mip_factor_for_levels(desiredLevels) : 1.0f;
const size_t expectedBytes = static_cast<size_t>(extent.width) * extent.height * bytes_per_texel(fmt) * mipFactor;
// Byte budget for this pump (frame)
if (admitted + expectedBytes > budgetBytes)
{
// push back to be retried next frame/pump
std::lock_guard<std::mutex> lk(_readyMutex);
_ready.push_front(std::move(res));
continue;
}
if (_gpuBudgetBytes != std::numeric_limits<size_t>::max())
{
@@ -388,9 +503,38 @@ void TextureCache::drain_ready_uploads(ResourceManager &rm)
}
}
const void *src = res.heap ? static_cast<const void *>(res.heap)
: static_cast<const void *>(res.rgba.data());
e.image = rm.create_image(src, extent, fmt, VK_IMAGE_USAGE_SAMPLED_BIT, res.mipmapped);
// Optionally repack channels to R or RG to save memory
std::vector<uint8_t> packed;
const void *src = nullptr;
if (hint == TextureKey::ChannelsHint::R)
{
packed.resize(static_cast<size_t>(extent.width) * extent.height);
const uint8_t* in = res.heap ? res.heap : res.rgba.data();
for (size_t i = 0, px = static_cast<size_t>(extent.width) * extent.height; i < px; ++i)
{
packed[i] = in[i * 4 + 0];
}
src = packed.data();
}
else if (hint == TextureKey::ChannelsHint::RG)
{
packed.resize(static_cast<size_t>(extent.width) * extent.height * 2);
const uint8_t* in = res.heap ? res.heap : res.rgba.data();
for (size_t i = 0, px = static_cast<size_t>(extent.width) * extent.height; i < px; ++i)
{
packed[i * 2 + 0] = in[i * 4 + 0];
packed[i * 2 + 1] = in[i * 4 + 1];
}
src = packed.data();
}
else
{
src = res.heap ? static_cast<const void *>(res.heap)
: static_cast<const void *>(res.rgba.data());
}
uint32_t mipOverride = (res.mipmapped ? desiredLevels : 1);
e.image = rm.create_image(src, extent, fmt, VK_IMAGE_USAGE_SAMPLED_BIT, res.mipmapped, mipOverride);
if (vmaDebugEnabled())
{
@@ -417,7 +561,9 @@ void TextureCache::drain_ready_uploads(ResourceManager &rm)
// Patch descriptors now; data becomes valid before sampling due to RG upload pass
patch_ready_entry(e);
admitted += expectedBytes;
}
return admitted;
}
void TextureCache::drop_source_bytes(Entry &e)

View File

@@ -28,11 +28,14 @@ public:
struct TextureKey
{
enum class SourceKind : uint8_t { FilePath, Bytes };
enum class ChannelsHint : uint8_t { Auto, R, RG, RGBA };
SourceKind kind{SourceKind::FilePath};
std::string path; // used when kind==FilePath
std::vector<uint8_t> bytes; // used when kind==Bytes
bool srgb{false}; // desired sampling format
bool mipmapped{true}; // generate full mip chain
ChannelsHint channels{ChannelsHint::Auto}; // prefer narrower formats when possible
uint32_t mipClampLevels{0}; // 0 = full chain, otherwise limit to N mips
uint64_t hash{0}; // stable dedup key
};
@@ -88,6 +91,14 @@ public:
// Runtime controls
void set_max_loads_per_pump(int n) { _maxLoadsPerPump = (n > 0) ? n : 1; }
int max_loads_per_pump() const { return _maxLoadsPerPump; }
// Limit total bytes admitted for uploads per pump (frame).
void set_max_bytes_per_pump(size_t bytes) { _maxBytesPerPump = bytes; }
size_t max_bytes_per_pump() const { return _maxBytesPerPump; }
// Clamp decoded image dimensions before upload (progressive resolution).
// 0 disables clamping. When >0, images larger than this dimension on any axis
// are downscaled by powers of 2 on the decode thread until within limit.
void set_max_upload_dimension(uint32_t dim) { _maxUploadDimension = dim; }
uint32_t max_upload_dimension() const { return _maxUploadDimension; }
// If false (default), compressed source bytes are dropped once an image is
// uploaded to the GPU and descriptors patched. Set true to retain sources
@@ -147,6 +158,8 @@ private:
size_t _cpuSourceBudget{64ull * 1024ull * 1024ull}; // 64 MiB default
size_t _gpuBudgetBytes{std::numeric_limits<size_t>::max()}; // unlimited unless set
uint32_t _reloadCooldownFrames{2};
size_t _maxBytesPerPump{128ull * 1024ull * 1024ull}; // 128 MiB/frame upload budget
uint32_t _maxUploadDimension{4096}; // progressive downscale cap
void start_load(Entry &e, ResourceManager &rm);
void patch_ready_entry(const Entry &e);
@@ -173,11 +186,14 @@ private:
std::vector<uint8_t> rgba;
bool mipmapped{true};
bool srgb{false};
TextureKey::ChannelsHint channels{TextureKey::ChannelsHint::Auto};
uint32_t mipClampLevels{0};
};
void worker_loop();
void enqueue_decode(Entry &e);
void drain_ready_uploads(ResourceManager &rm);
// Returns total resident bytes admitted this pump (after GPU budget gate).
size_t drain_ready_uploads(ResourceManager &rm, size_t budgetBytes);
void drop_source_bytes(Entry &e);
void evictCpuToBudget();

View File

@@ -193,6 +193,12 @@ namespace {
{
eng->_textureCache->set_max_loads_per_pump(loadsPerPump);
}
static int uploadBudgetMiB = 128;
uploadBudgetMiB = (int)(eng->_textureCache->max_bytes_per_pump() / 1048576ull);
if (ImGui::SliderInt("Upload Budget (MiB)", &uploadBudgetMiB, 16, 2048))
{
eng->_textureCache->set_max_bytes_per_pump((size_t)uploadBudgetMiB * 1048576ull);
}
static bool keepSources = false;
keepSources = eng->_textureCache->keep_source_bytes();
if (ImGui::Checkbox("Keep Source Bytes", &keepSources))
@@ -205,6 +211,12 @@ namespace {
{
eng->_textureCache->set_cpu_source_budget((size_t)cpuBudgetMiB * 1048576ull);
}
static int maxUploadDim = 4096;
maxUploadDim = (int)eng->_textureCache->max_upload_dimension();
if (ImGui::SliderInt("Max Upload Dimension", &maxUploadDim, 0, 8192))
{
eng->_textureCache->set_max_upload_dimension((uint32_t)std::max(0, maxUploadDim));
}
TextureCache::DebugStats stats{};
std::vector<TextureCache::DebugRow> rows;
@@ -558,10 +570,12 @@ void VulkanEngine::init()
_textureCache = std::make_unique<TextureCache>();
_textureCache->init(_context.get());
_context->textures = _textureCache.get();
// Conservative defaults to avoid CPU spikes during heavy glTF loads.
// Conservative defaults to avoid CPU/RAM/VRAM spikes during heavy glTF loads.
_textureCache->set_max_loads_per_pump(3);
_textureCache->set_keep_source_bytes(false);
_textureCache->set_cpu_source_budget(64ull * 1024ull * 1024ull); // 32 MiB
_textureCache->set_max_bytes_per_pump(128ull * 1024ull * 1024ull); // 128 MiB/frame
_textureCache->set_max_upload_dimension(4096);
// Optional ray tracing manager if supported and extensions enabled
if (_deviceManager->supportsRayQuery() && _deviceManager->supportsAccelerationStructure())

View File

@@ -138,9 +138,14 @@ void vkutil::copy_image_to_image(VkCommandBuffer cmd, VkImage source, VkImage de
}
//< copyimg
//> mipgen
void vkutil::generate_mipmaps(VkCommandBuffer cmd, VkImage image, VkExtent2D imageSize)
static inline int compute_full_mip_count(VkExtent2D imageSize)
{
int mipLevels = int(std::floor(std::log2(std::max(imageSize.width, imageSize.height)))) + 1;
return int(std::floor(std::log2(std::max(imageSize.width, imageSize.height)))) + 1;
}
void vkutil::generate_mipmaps_levels(VkCommandBuffer cmd, VkImage image, VkExtent2D imageSize, int mipLevels)
{
if (mipLevels <= 0) mipLevels = 1;
for (int mip = 0; mip < mipLevels; mip++) {
VkExtent2D halfSize = imageSize;
@@ -210,3 +215,8 @@ void vkutil::generate_mipmaps(VkCommandBuffer cmd, VkImage image, VkExtent2D ima
transition_image(cmd, image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
}
//< mipgen
void vkutil::generate_mipmaps(VkCommandBuffer cmd, VkImage image, VkExtent2D imageSize)
{
generate_mipmaps_levels(cmd, image, imageSize, compute_full_mip_count(imageSize));
}

View File

@@ -7,4 +7,6 @@ namespace vkutil {
void copy_image_to_image(VkCommandBuffer cmd, VkImage source, VkImage destination, VkExtent2D srcSize, VkExtent2D dstSize);
void generate_mipmaps(VkCommandBuffer cmd, VkImage image, VkExtent2D imageSize);
// Variant that generates exactly mipLevels levels (starting at base level 0).
void generate_mipmaps_levels(VkCommandBuffer cmd, VkImage image, VkExtent2D imageSize, int mipLevels);
};

View File

@@ -134,6 +134,43 @@ AllocatedImage ResourceManager::create_image(VkExtent3D size, VkFormat format, V
return newImage;
}
AllocatedImage ResourceManager::create_image(VkExtent3D size, VkFormat format, VkImageUsageFlags usage,
bool mipmapped, uint32_t mipLevelsOverride) const
{
if (!mipmapped || mipLevelsOverride == 0)
{
return create_image(size, format, usage, mipmapped);
}
AllocatedImage newImage{};
newImage.imageFormat = format;
newImage.imageExtent = size;
VkImageCreateInfo img_info = vkinit::image_create_info(format, usage, size);
img_info.mipLevels = mipLevelsOverride;
VmaAllocationCreateInfo allocinfo = {};
allocinfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
allocinfo.requiredFlags = static_cast<VkMemoryPropertyFlags>(VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
VK_CHECK(
vmaCreateImage(_deviceManager->allocator(), &img_info, &allocinfo, &newImage.image, &newImage.allocation,
nullptr));
VkImageAspectFlags aspectFlag = VK_IMAGE_ASPECT_COLOR_BIT;
if (format == VK_FORMAT_D32_SFLOAT)
{
aspectFlag = VK_IMAGE_ASPECT_DEPTH_BIT;
}
VkImageViewCreateInfo view_info = vkinit::imageview_create_info(format, newImage.image, aspectFlag);
view_info.subresourceRange.levelCount = img_info.mipLevels;
VK_CHECK(vkCreateImageView(_deviceManager->device(), &view_info, nullptr, &newImage.imageView));
return newImage;
}
// Returns byte size per texel for a subset of common formats.
static inline size_t bytes_per_texel(VkFormat fmt)
{
@@ -180,6 +217,46 @@ AllocatedImage ResourceManager::create_image(const void *data, VkExtent3D size,
pending.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
pending.finalLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
pending.generateMips = mipmapped;
pending.mipLevels = (mipmapped)
? static_cast<uint32_t>(std::floor(std::log2(std::max(size.width, size.height)))) + 1
: 1;
_pendingImageUploads.push_back(std::move(pending));
if (!_deferUploads)
{
process_queued_uploads_immediate();
}
return new_image;
}
AllocatedImage ResourceManager::create_image(const void *data, VkExtent3D size, VkFormat format,
VkImageUsageFlags usage,
bool mipmapped, uint32_t mipLevelsOverride)
{
size_t bpp = bytes_per_texel(format);
size_t data_size = static_cast<size_t>(size.depth) * size.width * size.height * bpp;
AllocatedBuffer uploadbuffer = create_buffer(data_size, VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
VMA_MEMORY_USAGE_CPU_TO_GPU);
memcpy(uploadbuffer.info.pMappedData, data, data_size);
vmaFlushAllocation(_deviceManager->allocator(), uploadbuffer.allocation, 0, data_size);
AllocatedImage new_image = create_image(size, format,
usage | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
mipmapped, mipLevelsOverride);
PendingImageUpload pending{};
pending.staging = uploadbuffer;
pending.image = new_image.image;
pending.extent = size;
pending.format = format;
pending.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
pending.finalLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
pending.generateMips = mipmapped;
pending.mipLevels = (mipmapped && mipLevelsOverride > 0) ? mipLevelsOverride
: (mipmapped ? static_cast<uint32_t>(std::floor(std::log2(std::max(size.width, size.height)))) + 1 : 1);
_pendingImageUploads.push_back(std::move(pending));
@@ -333,8 +410,9 @@ void ResourceManager::process_queued_uploads_immediate()
if (imageUpload.generateMips)
{
vkutil::generate_mipmaps(cmd, imageUpload.image,
VkExtent2D{imageUpload.extent.width, imageUpload.extent.height});
vkutil::generate_mipmaps_levels(cmd, imageUpload.image,
VkExtent2D{imageUpload.extent.width, imageUpload.extent.height},
static_cast<int>(imageUpload.mipLevels));
}
else
{
@@ -507,10 +585,11 @@ void ResourceManager::register_upload_pass(RenderGraph &graph, FrameResources &f
if (upload.generateMips)
{
// NOTE: generate_mipmaps() transitions the image to
// NOTE: generate_mipmaps_levels() transitions the image to
// VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL at the end.
// Do not transition back to TRANSFER here. See docs/ResourceManager.md.
vkutil::generate_mipmaps(cmd, image, VkExtent2D{upload.extent.width, upload.extent.height});
// Do not transition back to TRANSFER here.
vkutil::generate_mipmaps_levels(cmd, image, VkExtent2D{upload.extent.width, upload.extent.height},
static_cast<int>(upload.mipLevels));
}
}
});

View File

@@ -36,6 +36,7 @@ public:
VkImageLayout initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
VkImageLayout finalLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
bool generateMips = false;
uint32_t mipLevels = 1;
};
void init(DeviceManager *deviceManager);
@@ -48,9 +49,15 @@ public:
AllocatedImage create_image(VkExtent3D size, VkFormat format, VkImageUsageFlags usage,
bool mipmapped = false) const;
// Variant with explicit mip level count (>=1). If 0, computes full chain when mipmapped.
AllocatedImage create_image(VkExtent3D size, VkFormat format, VkImageUsageFlags usage,
bool mipmapped, uint32_t mipLevelsOverride) const;
AllocatedImage create_image(const void *data, VkExtent3D size, VkFormat format, VkImageUsageFlags usage,
bool mipmapped = false);
// Variant with explicit mip level count used for generate_mipmaps.
AllocatedImage create_image(const void *data, VkExtent3D size, VkFormat format, VkImageUsageFlags usage,
bool mipmapped, uint32_t mipLevelsOverride);
void destroy_image(const AllocatedImage &img) const;