ADD: KTX loader

This commit is contained in:
2025-11-10 17:24:27 +09:00
parent d97db7d801
commit 62092513e2
7 changed files with 367 additions and 112 deletions

View File

@@ -34,6 +34,8 @@ add_executable (vulkan_engine
core/frame_resources.cpp
core/texture_cache.h
core/texture_cache.cpp
core/ktx2_loader.h
core/ktx2_loader.cpp
core/config.h
core/vk_engine.h
core/vk_engine.cpp

View File

@@ -6,9 +6,12 @@
#include <core/config.h>
#include <algorithm>
#include "stb_image.h"
#include "ktx2_loader.h"
#include <algorithm>
#include "vk_device.h"
#include <cstring>
#include <filesystem>
#include <fstream>
#include <limits>
#include <cmath>
@@ -372,58 +375,131 @@ void TextureCache::worker_loop()
_queue.pop_front();
}
// Decode using stb_image
int w = 0, h = 0, comp = 0;
unsigned char *data = nullptr;
if (rq.key.kind == TextureKey::SourceKind::FilePath)
{
data = stbi_load(rq.path.c_str(), &w, &h, &comp, 4);
}
else
{
if (!rq.bytes.empty())
{
data = stbi_load_from_memory(rq.bytes.data(), static_cast<int>(rq.bytes.size()), &w, &h, &comp, 4);
}
}
DecodedResult out{};
out.handle = rq.handle;
out.width = w;
out.height = h;
out.mipmapped = rq.key.mipmapped;
out.srgb = rq.key.srgb;
out.channels = rq.key.channels;
out.mipClampLevels = rq.key.mipClampLevels;
if (data && w > 0 && h > 0)
// 1) Prefer KTX2 when source is a file path and a .ktx2 version exists
bool attemptedKTX2 = false;
if (rq.key.kind == TextureKey::SourceKind::FilePath)
{
// Progressive downscale if requested
if (_maxUploadDimension > 0 && (w > static_cast<int>(_maxUploadDimension) || h > static_cast<int>(_maxUploadDimension)))
std::filesystem::path p = rq.path;
std::filesystem::path ktxPath;
if (p.extension() == ".ktx2")
{
std::vector<uint8_t> scaled;
scaled.assign(data, data + static_cast<size_t>(w) * h * 4);
int cw = w, ch = h;
while (cw > static_cast<int>(_maxUploadDimension) || ch > static_cast<int>(_maxUploadDimension))
{
auto tmp = downscale_half(scaled.data(), cw, ch, 4);
scaled.swap(tmp);
cw = std::max(1, cw / 2);
ch = std::max(1, ch / 2);
}
stbi_image_free(data);
out.rgba = std::move(scaled);
out.width = cw;
out.height = ch;
ktxPath = p;
}
else
{
out.heap = data;
out.heapBytes = static_cast<size_t>(w) * static_cast<size_t>(h) * 4u;
ktxPath = p;
ktxPath.replace_extension(".ktx2");
}
std::error_code ec;
bool hasKTX2 = (!ktxPath.empty() && std::filesystem::exists(ktxPath, ec) && !ec);
if (hasKTX2)
{
attemptedKTX2 = true;
// Read file
fmt::println("[TextureCache] KTX2 candidate for '{}' → '{}'", rq.path, ktxPath.string());
std::ifstream ifs(ktxPath, std::ios::binary);
if (ifs)
{
std::vector<uint8_t> fileBytes(std::istreambuf_iterator<char>(ifs), {});
fmt::println("[TextureCache] KTX2 read {} bytes", fileBytes.size());
KTX2Image ktx{};
std::string err;
if (parse_ktx2(fileBytes.data(), fileBytes.size(), ktx, &err))
{
fmt::println("[TextureCache] KTX2 parsed: format={}, {}x{}, mips={}, faces={}, layers={}, supercompression={}",
string_VkFormat(static_cast<VkFormat>(ktx.format)), ktx.width, ktx.height,
ktx.mipLevels, ktx.faceCount, ktx.layerCount, ktx.supercompression);
size_t sum = 0; for (const auto &lv: ktx.levels) sum += static_cast<size_t>(lv.length);
fmt::println("[TextureCache] KTX2 levels: {} totalBytes={}", ktx.levels.size(), sum);
for (size_t li = 0; li < ktx.levels.size(); ++li)
{
fmt::println(" L{}: off={}, len={}, extent={}x{}", li, ktx.levels[li].offset,
ktx.levels[li].length,
std::max(1u, ktx.width >> li),
std::max(1u, ktx.height >> li));
}
out.isKTX2 = true;
out.ktxFormat = ktx.format;
out.ktxMipLevels = ktx.mipLevels;
out.ktx.bytes = std::move(ktx.data);
out.ktx.levels.reserve(ktx.levels.size());
for (const auto &lv : ktx.levels)
{
out.ktx.levels.push_back({lv.offset, lv.length, lv.width, lv.height});
}
out.width = static_cast<int>(ktx.width);
out.height = static_cast<int>(ktx.height);
}
else
{
fmt::println("[TextureCache] parse_ktx2 failed for '{}' ({} bytes): {}",
ktxPath.string(), fileBytes.size(), err);
}
}
else
{
fmt::println("[TextureCache] Failed to open KTX2 file '{}'", ktxPath.string());
}
}
else if (p.extension() == ".ktx2")
{
fmt::println("[TextureCache] Requested .ktx2 '{}' but file not found (ec={})", p.string(), ec.value());
}
}
else if (data)
// 2) Raster fallback via stb_image if not KTX2 or unsupported
if (!out.isKTX2)
{
stbi_image_free(data);
int w = 0, h = 0, comp = 0;
unsigned char *data = nullptr;
if (rq.key.kind == TextureKey::SourceKind::FilePath)
{
data = stbi_load(rq.path.c_str(), &w, &h, &comp, 4);
}
else if (!rq.bytes.empty())
{
data = stbi_load_from_memory(rq.bytes.data(), static_cast<int>(rq.bytes.size()), &w, &h, &comp, 4);
}
out.width = w;
out.height = h;
if (data && w > 0 && h > 0)
{
// Progressive downscale if requested
if (_maxUploadDimension > 0 && (w > static_cast<int>(_maxUploadDimension) || h > static_cast<int>(_maxUploadDimension)))
{
std::vector<uint8_t> scaled;
scaled.assign(data, data + static_cast<size_t>(w) * h * 4);
int cw = w, ch = h;
while (cw > static_cast<int>(_maxUploadDimension) || ch > static_cast<int>(_maxUploadDimension))
{
auto tmp = downscale_half(scaled.data(), cw, ch, 4);
scaled.swap(tmp);
cw = std::max(1, cw / 2);
ch = std::max(1, ch / 2);
}
stbi_image_free(data);
out.rgba = std::move(scaled);
out.width = cw;
out.height = ch;
}
else
{
out.heap = data;
out.heapBytes = static_cast<size_t>(w) * static_cast<size_t>(h) * 4u;
}
}
else if (data)
{
stbi_image_free(data);
}
}
{
@@ -447,34 +523,38 @@ size_t TextureCache::drain_ready_uploads(ResourceManager &rm, size_t budgetBytes
{
if (res.handle == InvalidHandle || res.handle >= _entries.size()) continue;
Entry &e = _entries[res.handle];
if ((res.heap == nullptr && res.rgba.empty()) || res.width <= 0 || res.height <= 0)
if (!res.isKTX2 && ((res.heap == nullptr && res.rgba.empty()) || res.width <= 0 || res.height <= 0))
{
e.state = EntryState::Evicted; // failed decode; keep fallback
continue;
}
const uint32_t now = _context ? _context->frameIndex : 0u;
VkExtent3D extent{static_cast<uint32_t>(res.width), static_cast<uint32_t>(res.height), 1u};
VkExtent3D extent{static_cast<uint32_t>(std::max(0, res.width)), static_cast<uint32_t>(std::max(0, res.height)), 1u};
TextureKey::ChannelsHint hint = (e.key.channels == TextureKey::ChannelsHint::Auto)
? TextureKey::ChannelsHint::Auto
: e.key.channels;
VkFormat fmt = choose_format(hint, res.srgb);
// Estimate resident size for admission control (match post-upload computation)
size_t expectedBytes = 0;
VkFormat fmt = VK_FORMAT_UNDEFINED;
uint32_t desiredLevels = 1;
if (res.mipmapped)
if (res.isKTX2)
{
if (res.mipClampLevels > 0)
{
desiredLevels = res.mipClampLevels;
}
else
{
desiredLevels = static_cast<uint32_t>(std::floor(std::log2(std::max(extent.width, extent.height)))) + 1u;
}
fmt = res.ktxFormat;
desiredLevels = res.ktxMipLevels;
for (const auto &lv : res.ktx.levels) expectedBytes += static_cast<size_t>(lv.length);
}
else
{
fmt = choose_format(hint, res.srgb);
if (res.mipmapped)
{
if (res.mipClampLevels > 0) desiredLevels = res.mipClampLevels;
else desiredLevels = static_cast<uint32_t>(std::floor(std::log2(std::max(extent.width, extent.height)))) + 1u;
}
const float mipFactor = res.mipmapped ? mip_factor_for_levels(desiredLevels) : 1.0f;
expectedBytes = static_cast<size_t>(extent.width) * extent.height * bytes_per_texel(fmt) * mipFactor;
}
const float mipFactor = res.mipmapped ? mip_factor_for_levels(desiredLevels) : 1.0f;
const size_t expectedBytes = static_cast<size_t>(extent.width) * extent.height * bytes_per_texel(fmt) * mipFactor;
// Byte budget for this pump (frame)
if (admitted + expectedBytes > budgetBytes)
@@ -503,38 +583,95 @@ size_t TextureCache::drain_ready_uploads(ResourceManager &rm, size_t budgetBytes
}
}
// Optionally repack channels to R or RG to save memory
std::vector<uint8_t> packed;
const void *src = nullptr;
if (hint == TextureKey::ChannelsHint::R)
if (res.isKTX2)
{
packed.resize(static_cast<size_t>(extent.width) * extent.height);
const uint8_t* in = res.heap ? res.heap : res.rgba.data();
for (size_t i = 0, px = static_cast<size_t>(extent.width) * extent.height; i < px; ++i)
// Basic format support check: ensure the GPU can sample this format
bool supported = true;
if (_context && _context->getDevice())
{
packed[i] = in[i * 4 + 0];
VkFormatProperties props{};
vkGetPhysicalDeviceFormatProperties(_context->getDevice()->physicalDevice(), fmt, &props);
supported = (props.optimalTilingFeatures & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT) != 0;
}
src = packed.data();
}
else if (hint == TextureKey::ChannelsHint::RG)
{
packed.resize(static_cast<size_t>(extent.width) * extent.height * 2);
const uint8_t* in = res.heap ? res.heap : res.rgba.data();
for (size_t i = 0, px = static_cast<size_t>(extent.width) * extent.height; i < px; ++i)
if (!supported)
{
packed[i * 2 + 0] = in[i * 4 + 0];
packed[i * 2 + 1] = in[i * 4 + 1];
VkFormatProperties props{};
if (_context && _context->getDevice())
{
vkGetPhysicalDeviceFormatProperties(_context->getDevice()->physicalDevice(), fmt, &props);
}
fmt::println("[TextureCache] Compressed format unsupported: format={} (optimalFeatures=0x{:08x}) — fallback raster for {}",
string_VkFormat(fmt), props.optimalTilingFeatures, e.path);
// Fall back to raster path: requeue by synthesizing a non-KTX result
// Attempt synchronous fallback decode from file if available.
int fw = 0, fh = 0, comp = 0;
unsigned char *fdata = nullptr;
if (e.key.kind == TextureKey::SourceKind::FilePath)
{
fdata = stbi_load(e.path.c_str(), &fw, &fh, &comp, 4);
}
if (!fdata)
{
e.state = EntryState::Evicted;
continue;
}
VkExtent3D fext{ (uint32_t)fw, (uint32_t)fh, 1 };
VkFormat ffmt = choose_format(hint, res.srgb);
uint32_t mips = (res.mipmapped) ? static_cast<uint32_t>(std::floor(std::log2(std::max(fext.width, fext.height)))) + 1u : 1u;
e.image = rm.create_image(fdata, fext, ffmt, VK_IMAGE_USAGE_SAMPLED_BIT, res.mipmapped, mips);
stbi_image_free(fdata);
e.sizeBytes = static_cast<size_t>(fext.width) * fext.height * bytes_per_texel(ffmt) * (res.mipmapped ? mip_factor_for_levels(mips) : 1.0f);
}
else
{
// Prepare level table for ResourceManager
std::vector<ResourceManager::MipLevelCopy> levels;
levels.reserve(res.ktx.levels.size());
for (const auto &lv : res.ktx.levels)
{
levels.push_back(ResourceManager::MipLevelCopy{ lv.offset, lv.length, lv.width, lv.height });
}
e.image = rm.create_image_compressed(res.ktx.bytes.data(), res.ktx.bytes.size(), fmt, levels);
e.sizeBytes = expectedBytes;
}
src = packed.data();
}
else
{
src = res.heap ? static_cast<const void *>(res.heap)
: static_cast<const void *>(res.rgba.data());
}
// Optionally repack channels to R or RG to save memory
std::vector<uint8_t> packed;
const void *src = nullptr;
if (hint == TextureKey::ChannelsHint::R)
{
packed.resize(static_cast<size_t>(extent.width) * extent.height);
const uint8_t* in = res.heap ? res.heap : res.rgba.data();
for (size_t i = 0, px = static_cast<size_t>(extent.width) * extent.height; i < px; ++i)
{
packed[i] = in[i * 4 + 0];
}
src = packed.data();
}
else if (hint == TextureKey::ChannelsHint::RG)
{
packed.resize(static_cast<size_t>(extent.width) * extent.height * 2);
const uint8_t* in = res.heap ? res.heap : res.rgba.data();
for (size_t i = 0, px = static_cast<size_t>(extent.width) * extent.height; i < px; ++i)
{
packed[i * 2 + 0] = in[i * 4 + 0];
packed[i * 2 + 1] = in[i * 4 + 1];
}
src = packed.data();
}
else
{
src = res.heap ? static_cast<const void *>(res.heap)
: static_cast<const void *>(res.rgba.data());
}
uint32_t mipOverride = (res.mipmapped ? desiredLevels : 1);
e.image = rm.create_image(src, extent, fmt, VK_IMAGE_USAGE_SAMPLED_BIT, res.mipmapped, mipOverride);
uint32_t mipOverride = (res.mipmapped ? desiredLevels : 1);
e.image = rm.create_image(src, extent, fmt, VK_IMAGE_USAGE_SAMPLED_BIT, res.mipmapped, mipOverride);
e.sizeBytes = expectedBytes;
}
if (vmaDebugEnabled())
{
@@ -542,7 +679,6 @@ size_t TextureCache::drain_ready_uploads(ResourceManager &rm, size_t budgetBytes
vmaSetAllocationName(_context->getDevice()->allocator(), e.image.allocation, name.c_str());
}
e.sizeBytes = expectedBytes;
_residentBytes += e.sizeBytes;
e.state = EntryState::Resident;
e.nextAttemptFrame = 0; // clear backoff after success

View File

@@ -188,6 +188,17 @@ private:
bool srgb{false};
TextureKey::ChannelsHint channels{TextureKey::ChannelsHint::Auto};
uint32_t mipClampLevels{0};
// Compressed path (KTX2 pre-transcoded BCn). When true, 'rgba/heap'
// are ignored and the fields below describe the payload.
bool isKTX2{false};
VkFormat ktxFormat{VK_FORMAT_UNDEFINED};
uint32_t ktxMipLevels{0};
struct KTXPack {
struct L { uint64_t offset{0}, length{0}; uint32_t width{0}, height{0}; };
std::vector<uint8_t> bytes; // full file content
std::vector<L> levels; // per-mip region description
} ktx;
};
void worker_loop();

View File

@@ -391,22 +391,34 @@ void ResourceManager::process_queued_uploads_immediate()
vkutil::transition_image(cmd, imageUpload.image, imageUpload.initialLayout,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
VkBufferImageCopy copyRegion = {};
copyRegion.bufferOffset = 0;
copyRegion.bufferRowLength = 0;
copyRegion.bufferImageHeight = 0;
copyRegion.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
copyRegion.imageSubresource.mipLevel = 0;
copyRegion.imageSubresource.baseArrayLayer = 0;
copyRegion.imageSubresource.layerCount = 1;
copyRegion.imageExtent = imageUpload.extent;
if (!imageUpload.copies.empty())
{
vkCmdCopyBufferToImage(cmd,
imageUpload.staging.buffer,
imageUpload.image,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
static_cast<uint32_t>(imageUpload.copies.size()),
imageUpload.copies.data());
}
else
{
VkBufferImageCopy copyRegion = {};
copyRegion.bufferOffset = 0;
copyRegion.bufferRowLength = 0;
copyRegion.bufferImageHeight = 0;
copyRegion.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
copyRegion.imageSubresource.mipLevel = 0;
copyRegion.imageSubresource.baseArrayLayer = 0;
copyRegion.imageSubresource.layerCount = 1;
copyRegion.imageExtent = imageUpload.extent;
vkCmdCopyBufferToImage(cmd,
imageUpload.staging.buffer,
imageUpload.image,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
1,
&copyRegion);
vkCmdCopyBufferToImage(cmd,
imageUpload.staging.buffer,
imageUpload.image,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
1,
&copyRegion);
}
if (imageUpload.generateMips)
{
@@ -571,26 +583,37 @@ void ResourceManager::register_upload_pass(RenderGraph &graph, FrameResources &f
VkBuffer staging = res.buffer(binding.stagingHandle);
VkImage image = res.image(binding.imageHandle);
VkBufferImageCopy region{};
region.bufferOffset = 0;
region.bufferRowLength = 0;
region.bufferImageHeight = 0;
region.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
region.imageSubresource.mipLevel = 0;
region.imageSubresource.baseArrayLayer = 0;
region.imageSubresource.layerCount = 1;
region.imageExtent = upload.extent;
vkCmdCopyBufferToImage(cmd, staging, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &region);
if (!upload.copies.empty())
{
vkCmdCopyBufferToImage(cmd, staging, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
static_cast<uint32_t>(upload.copies.size()), upload.copies.data());
}
else
{
VkBufferImageCopy region{};
region.bufferOffset = 0;
region.bufferRowLength = 0;
region.bufferImageHeight = 0;
region.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
region.imageSubresource.mipLevel = 0;
region.imageSubresource.baseArrayLayer = 0;
region.imageSubresource.layerCount = 1;
region.imageExtent = upload.extent;
vkCmdCopyBufferToImage(cmd, staging, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &region);
}
if (upload.generateMips)
{
// NOTE: generate_mipmaps_levels() transitions the image to
// VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL at the end.
// Do not transition back to TRANSFER here.
vkutil::generate_mipmaps_levels(cmd, image, VkExtent2D{upload.extent.width, upload.extent.height},
static_cast<int>(upload.mipLevels));
}
else
{
// Transition to final layout for sampling
vkutil::transition_image(cmd, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, upload.finalLayout);
}
}
});
@@ -606,3 +629,63 @@ void ResourceManager::register_upload_pass(RenderGraph &graph, FrameResources &f
}
});
}
AllocatedImage ResourceManager::create_image_compressed(const void* bytes, size_t size,
VkFormat fmt,
std::span<const MipLevelCopy> levels,
VkImageUsageFlags usage)
{
if (bytes == nullptr || size == 0 || levels.empty())
{
return {};
}
// Determine base extent from level 0
VkExtent3D extent{ levels[0].width, levels[0].height, 1 };
// Stage full payload as-is
AllocatedBuffer uploadbuffer = create_buffer(size, VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
VMA_MEMORY_USAGE_CPU_TO_GPU);
std::memcpy(uploadbuffer.info.pMappedData, bytes, size);
vmaFlushAllocation(_deviceManager->allocator(), uploadbuffer.allocation, 0, size);
// Create GPU image with explicit mip count; no mip generation
const uint32_t mipCount = static_cast<uint32_t>(levels.size());
AllocatedImage new_image = create_image(extent, fmt,
usage | VK_IMAGE_USAGE_TRANSFER_DST_BIT,
/*mipmapped=*/true, mipCount);
PendingImageUpload pending{};
pending.staging = uploadbuffer;
pending.image = new_image.image;
pending.extent = extent;
pending.format = fmt;
pending.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
pending.finalLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
pending.generateMips = false;
pending.mipLevels = mipCount;
pending.copies.reserve(levels.size());
for (uint32_t i = 0; i < mipCount; ++i)
{
VkBufferImageCopy region{};
region.bufferOffset = levels[i].offset;
region.bufferRowLength = 0; // tightly packed
region.bufferImageHeight = 0;
region.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
region.imageSubresource.mipLevel = i;
region.imageSubresource.baseArrayLayer = 0;
region.imageSubresource.layerCount = 1;
region.imageExtent = { levels[i].width, levels[i].height, 1 };
pending.copies.push_back(region);
}
_pendingImageUploads.push_back(std::move(pending));
if (!_deferUploads)
{
process_queued_uploads_immediate();
}
return new_image;
}

View File

@@ -13,6 +13,13 @@ struct FrameResources;
class ResourceManager
{
public:
struct MipLevelCopy
{
uint64_t offset{0};
uint64_t length{0};
uint32_t width{0};
uint32_t height{0};
};
struct BufferCopyRegion
{
VkBuffer destination = VK_NULL_HANDLE;
@@ -37,6 +44,8 @@ public:
VkImageLayout finalLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
bool generateMips = false;
uint32_t mipLevels = 1;
// For multi-region (per-mip) uploads
std::vector<VkBufferImageCopy> copies;
};
void init(DeviceManager *deviceManager);
@@ -59,6 +68,14 @@ public:
AllocatedImage create_image(const void *data, VkExtent3D size, VkFormat format, VkImageUsageFlags usage,
bool mipmapped, uint32_t mipLevelsOverride);
// Create an image from a compressed payload (e.g., KTX2 pre-transcoded BCn).
// 'bytes' backs a single staging buffer; 'levels' provides per-mip copy regions.
// No GPU mip generation is performed; the number of mips equals levels.size().
AllocatedImage create_image_compressed(const void* bytes, size_t size,
VkFormat fmt,
std::span<const MipLevelCopy> levels,
VkImageUsageFlags usage = VK_IMAGE_USAGE_SAMPLED_BIT);
void destroy_image(const AllocatedImage &img) const;
GPUMeshBuffers uploadMesh(std::span<uint32_t> indices, std::span<Vertex> vertices);