diff --git a/docs/IBL.md b/docs/IBL.md index 8995b32..2900170 100644 --- a/docs/IBL.md +++ b/docs/IBL.md @@ -13,7 +13,7 @@ Data Flow - `VulkanEngine::init_vulkan()` creates an `IBLManager`, calls `init(context)`, and publishes it via `EngineContext::ibl`. - The engine optionally loads default IBL assets (`IBLPaths` in `src/core/engine.cpp`), typically a BRDF LUT plus a specular environment `.ktx2`. - Loading (IBLManager): - - `IBLManager::load(const IBLPaths&)`: + - `IBLManager::load(const IBLPaths&)` (synchronous, mostly used in tools/tests): - Specular: - Tries `ktxutil::load_ktx2_cubemap` first. If successful, uploads via `ResourceManager::create_image_compressed_layers` with `VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT`. - If cubemap loading fails, falls back to 2D `.ktx2` via `ktxutil::load_ktx2_2d` and `create_image_compressed`. The image is treated as equirectangular with prefiltered mips. @@ -28,6 +28,12 @@ Data Flow - Loaded as 2D `.ktx2` via `ktxutil::load_ktx2_2d` and uploaded with `create_image_compressed`. - Fallbacks: - If `diffuseCube` is missing but a specular env exists, `_diff` is aliased to `_spec`. + - `IBLManager::load_async(const IBLPaths&)` + `IBLManager::pump_async()` (runtime path used by the engine): + - `load_async` runs KTX2 file I/O and SH bake on a worker thread and stores a prepared CPU-side description (`PreparedIBLData`). + - `pump_async` is called on the main thread once per frame (after the previous frame is idle) to: + - Destroy old IBL images/SH via `destroy_images_and_sh()`. + - Create new GPU images with `create_image_compressed(_layers)` and upload the SH buffer. + - This avoids stalls in the main/game loop when switching IBL volumes or loading the default environment at startup. - `IBLManager::unload()` releases GPU images, the SH buffer, and the descriptor set layout. - Descriptor layout: - `IBLManager::ensureLayout()` builds a descriptor set layout (set=3) with: diff --git a/docs/TextureLoading.md b/docs/TextureLoading.md index 404a888..27fe86b 100644 --- a/docs/TextureLoading.md +++ b/docs/TextureLoading.md @@ -101,7 +101,7 @@ Image‑Based Lighting (IBL) Textures - Specular: - If `specularCube` is a cubemap `.ktx2`, `IBLManager` uses `ktxutil::load_ktx2_cubemap` and uploads via `ResourceManager::create_image_compressed_layers`, preserving the file’s format and mip chain. - If cubemap load fails, it falls back to 2D `.ktx2` via `ktxutil::load_ktx2_2d` + `ResourceManager::create_image_compressed`. The image is treated as equirectangular with prefiltered mips and sampled with explicit LOD in shaders. - - If the format is float HDR (`R16G16B16A16_SFLOAT` or `R32G32B32A32_SFLOAT`) and the aspect ratio is 2:1, `IBLManager` additionally computes 2nd‑order SH coefficients (9×`vec3`) on the CPU for diffuse irradiance and uploads them to a UBO (`_shBuffer`). + - If the format is float HDR (`R16G16B16A16_SFLOAT` or `R32G32B32A32_SFLOAT`) and the aspect ratio is 2:1, `IBLManager` additionally computes 2nd‑order SH coefficients (9×`vec3`) on a worker thread and uploads them to a UBO (`_shBuffer`) when `pump_async()` is called on the main thread. - Diffuse (optional): - If `diffuseCube` is provided and valid, it is uploaded as a cubemap using `create_image_compressed_layers`. Current shaders use the SH buffer for diffuse; this cubemap can be wired into a future path if you want to sample it directly. - BRDF LUT: diff --git a/src/core/assets/ibl_manager.cpp b/src/core/assets/ibl_manager.cpp index 9ee952a..ceafab2 100644 --- a/src/core/assets/ibl_manager.cpp +++ b/src/core/assets/ibl_manager.cpp @@ -8,291 +8,426 @@ #include #include #include +#include +#include +#include #include "core/device/device.h" #include "core/assets/texture_cache.h" +struct PreparedIBLData +{ + IBLPaths paths{}; + + bool has_spec{false}; + bool spec_is_cubemap{false}; + ktxutil::KtxCubemap spec_cubemap{}; + ktxutil::Ktx2D spec_2d{}; + + bool has_diffuse{false}; + ktxutil::KtxCubemap diff_cubemap{}; + + bool has_background{false}; + ktxutil::Ktx2D background_2d{}; + + bool has_brdf{false}; + ktxutil::Ktx2D brdf_2d{}; + + bool has_sh{false}; + glm::vec4 sh[9]{}; +}; + +namespace +{ + static bool compute_sh_from_ktx2_equirect(const char *path, glm::vec4 out_sh[9]) + { + if (path == nullptr) return false; + + ktxTexture2 *ktex = nullptr; + if (ktxTexture2_CreateFromNamedFile(path, KTX_TEXTURE_CREATE_LOAD_IMAGE_DATA_BIT, &ktex) != KTX_SUCCESS || !ktex) + { + return false; + } + + bool ok = false; + const VkFormat fmt = static_cast(ktex->vkFormat); + const bool isFloat16 = fmt == VK_FORMAT_R16G16B16A16_SFLOAT; + const bool isFloat32 = fmt == VK_FORMAT_R32G32B32A32_SFLOAT; + if (!ktxTexture2_NeedsTranscoding(ktex) && (isFloat16 || isFloat32) && ktex->baseWidth == 2 * ktex->baseHeight) + { + const uint32_t W = ktex->baseWidth; + const uint32_t H = ktex->baseHeight; + const uint8_t *dataPtr = reinterpret_cast(ktxTexture_GetData(ktxTexture(ktex))); + + struct Vec3 + { + float x, y, z; + }; + + auto half_to_float = [](uint16_t h) -> float { + uint16_t h_exp = (h & 0x7C00u) >> 10; + uint16_t h_sig = h & 0x03FFu; + uint32_t sign = (h & 0x8000u) << 16; + uint32_t f_e, f_sig; + if (h_exp == 0) + { + if (h_sig == 0) + { + f_e = 0; + f_sig = 0; + } + else + { + int e = -1; + uint16_t sig = h_sig; + while ((sig & 0x0400u) == 0) + { + sig <<= 1; + --e; + } + sig &= 0x03FFu; + f_e = uint32_t(127 - 15 + e) << 23; + f_sig = uint32_t(sig) << 13; + } + } + else if (h_exp == 0x1Fu) + { + f_e = 0xFFu << 23; + f_sig = uint32_t(h_sig) << 13; + } + else + { + f_e = uint32_t(h_exp - 15 + 127) << 23; + f_sig = uint32_t(h_sig) << 13; + } + uint32_t f = sign | f_e | f_sig; + float out; + std::memcpy(&out, &f, 4); + return out; + }; + + auto sample_at = [&](uint32_t x, uint32_t y) -> Vec3 { + if (isFloat32) + { + const float *px = reinterpret_cast(dataPtr) + 4ull * (y * W + x); + return {px[0], px[1], px[2]}; + } + else + { + const uint16_t *px = reinterpret_cast(dataPtr) + 4ull * (y * W + x); + return {half_to_float(px[0]), half_to_float(px[1]), half_to_float(px[2])}; + } + }; + + const float dtheta = float(M_PI) / float(H); + const float dphi = 2.f * float(M_PI) / float(W); + + std::array c{}; + for (auto &v : c) v = glm::vec3(0.0f); + + auto sh_basis = [](const glm::vec3 &d) -> std::array { + const float x = d.x, y = d.y, z = d.z; + const float c0 = 0.2820947918f; + const float c1 = 0.4886025119f; + const float c2 = 1.0925484306f; + const float c3 = 0.3153915653f; + const float c4 = 0.5462742153f; + return { + c0, + c1 * y, + c1 * z, + c1 * x, + c2 * x * y, + c2 * y * z, + c3 * (3.f * z * z - 1.f), + c2 * x * z, + c4 * (x * x - y * y) + }; + }; + + for (uint32_t y = 0; y < H; ++y) + { + float theta = (y + 0.5f) * dtheta; + float sinT = std::sin(theta); + for (uint32_t x = 0; x < W; ++x) + { + float phi = (x + 0.5f) * dphi; + glm::vec3 dir = glm::vec3(std::cos(phi) * sinT, std::cos(theta), std::sin(phi) * sinT); + auto Lrgb = sample_at(x, y); + glm::vec3 Lvec(Lrgb.x, Lrgb.y, Lrgb.z); + auto Y = sh_basis(dir); + float dOmega = dphi * dtheta * sinT; + for (int i = 0; i < 9; ++i) + { + c[i] += Lvec * (Y[i] * dOmega); + } + } + } + + const float A0 = float(M_PI); + const float A1 = 2.f * float(M_PI) / 3.f; + const float A2 = float(M_PI) / 4.f; + const float Aband[3] = {A0, A1, A2}; + for (int i = 0; i < 9; ++i) + { + int band = (i == 0) ? 0 : (i < 4 ? 1 : 2); + c[i] *= Aband[band]; + out_sh[i] = glm::vec4(c[i], 0.0f); + } + + ok = true; + } + + ktxTexture_Destroy(ktxTexture(ktex)); + return ok; + } + + static bool prepare_ibl_cpu(const IBLPaths &paths, PreparedIBLData &outData, std::string &outError) + { + outData = PreparedIBLData{}; + outData.paths = paths; + outError.clear(); + + if (!paths.specularCube.empty()) + { + ktxutil::KtxCubemap cube{}; + if (ktxutil::load_ktx2_cubemap(paths.specularCube.c_str(), cube)) + { + outData.has_spec = true; + outData.spec_is_cubemap = true; + outData.spec_cubemap = std::move(cube); + } + else + { + ktxutil::Ktx2D k2d{}; + if (ktxutil::load_ktx2_2d(paths.specularCube.c_str(), k2d)) + { + outData.has_spec = true; + outData.spec_is_cubemap = false; + outData.spec_2d = std::move(k2d); + + glm::vec4 sh[9]{}; + if (compute_sh_from_ktx2_equirect(paths.specularCube.c_str(), sh)) + { + outData.has_sh = true; + for (int i = 0; i < 9; ++i) + { + outData.sh[i] = sh[i]; + } + } + } + else + { + outError = "Failed to load specular IBL as cubemap or 2D KTX2"; + } + } + } + + if (!paths.diffuseCube.empty()) + { + ktxutil::KtxCubemap diff{}; + if (ktxutil::load_ktx2_cubemap(paths.diffuseCube.c_str(), diff)) + { + outData.has_diffuse = true; + outData.diff_cubemap = std::move(diff); + } + } + + if (!paths.background2D.empty()) + { + ktxutil::Ktx2D bg{}; + if (ktxutil::load_ktx2_2d(paths.background2D.c_str(), bg)) + { + outData.has_background = true; + outData.background_2d = std::move(bg); + } + } + + if (!paths.brdfLut2D.empty()) + { + ktxutil::Ktx2D lut{}; + if (ktxutil::load_ktx2_2d(paths.brdfLut2D.c_str(), lut)) + { + outData.has_brdf = true; + outData.brdf_2d = std::move(lut); + } + } + + // Success is defined by having a specular environment; diffuse/background/BRDF are optional. + if (!outData.has_spec) + { + if (outError.empty()) + { + outError = "Specular IBL KTX2 not found or invalid"; + } + return false; + } + return true; + } +} + +struct IBLManager::AsyncStateData +{ + std::mutex mutex; + std::condition_variable cv; + bool shutdown{false}; + + bool requestPending{false}; + IBLPaths requestPaths{}; + uint64_t requestId{0}; + + bool resultReady{false}; + bool resultSuccess{false}; + PreparedIBLData readyData{}; + std::string lastError; + uint64_t resultId{0}; + + std::thread worker; +}; + +IBLManager::~IBLManager() +{ + shutdown_async(); +} + +void IBLManager::init(EngineContext *ctx) +{ + _ctx = ctx; + + if (_async != nullptr) + { + return; + } + + _async = new AsyncStateData(); + AsyncStateData *state = _async; + + state->worker = std::thread([this, state]() { + for (;;) + { + IBLPaths paths{}; + uint64_t jobId = 0; + { + std::unique_lock lock(state->mutex); + state->cv.wait(lock, [state]() { return state->shutdown || state->requestPending; }); + if (state->shutdown) + { + break; + } + paths = state->requestPaths; + jobId = state->requestId; + state->requestPending = false; + } + + PreparedIBLData data{}; + std::string error; + bool ok = prepare_ibl_cpu(paths, data, error); + + { + std::lock_guard lock(state->mutex); + if (state->shutdown) + { + break; + } + // Drop results for superseded jobs. + if (jobId != state->requestId) + { + continue; + } + + state->readyData = std::move(data); + state->lastError = std::move(error); + state->resultSuccess = ok; + state->resultReady = true; + state->resultId = jobId; + } + } + }); +} + bool IBLManager::load(const IBLPaths &paths) { if (_ctx == nullptr || _ctx->getResources() == nullptr) return false; - ResourceManager *rm = _ctx->getResources(); - // When uploads are deferred into the RenderGraph, any previously queued - // image uploads might still reference VkImage handles owned by this - // manager. Before destroying or recreating IBL images, flush those - // uploads via the immediate path so we never record barriers or copies - // for images that have been destroyed. - if (rm->deferred_uploads() && rm->has_pending_uploads()) + PreparedIBLData data{}; + std::string error; + if (!prepare_ibl_cpu(paths, data, error)) { - rm->process_queued_uploads_immediate(); - } - - // Allow reloading at runtime: destroy previous images/SH but keep layout. - destroy_images_and_sh(); - ensureLayout(); - - // Load specular environment: prefer cubemap; fallback to 2D equirect with mips. - // Also hint the TextureCache (if present) so future switches are cheap. - if (!paths.specularCube.empty()) - { - // Try as cubemap first - ktxutil::KtxCubemap kcm{}; - if (ktxutil::load_ktx2_cubemap(paths.specularCube.c_str(), kcm)) + if (!error.empty()) { - _spec = rm->create_image_compressed_layers( - kcm.bytes.data(), kcm.bytes.size(), - kcm.fmt, kcm.mipLevels, kcm.layers, - kcm.copies, - VK_IMAGE_USAGE_SAMPLED_BIT, - kcm.imgFlags - ); + fmt::println("[IBL] load failed: {}", error); } - else + return false; + } + + return commit_prepared(data); +} + +bool IBLManager::load_async(const IBLPaths &paths) +{ + if (_ctx == nullptr || _ctx->getResources() == nullptr) + { + return false; + } + + if (_async == nullptr) + { + init(_ctx); + } + + AsyncStateData *state = _async; + { + std::lock_guard lock(state->mutex); + state->requestPaths = paths; + state->requestPending = true; + state->requestId++; + // Invalidate any previous ready result; it will be superseded by this job. + state->resultReady = false; + } + state->cv.notify_one(); + return true; +} + +IBLManager::AsyncResult IBLManager::pump_async() +{ + AsyncResult out{}; + + if (_async == nullptr || _ctx == nullptr || _ctx->getResources() == nullptr) + { + return out; + } + + AsyncStateData *state = _async; + + PreparedIBLData data{}; + bool success = false; + { + std::lock_guard lock(state->mutex); + if (!state->resultReady) { - ktxutil::Ktx2D k2d{}; - if (ktxutil::load_ktx2_2d(paths.specularCube.c_str(), k2d)) - { - std::vector lv; - lv.reserve(k2d.mipLevels); - for (uint32_t mip = 0; mip < k2d.mipLevels; ++mip) - { - const auto &r = k2d.copies[mip]; - lv.push_back(ResourceManager::MipLevelCopy{ - .offset = r.bufferOffset, - .length = 0, - .width = r.imageExtent.width, - .height = r.imageExtent.height, - }); - } - _spec = rm->create_image_compressed(k2d.bytes.data(), k2d.bytes.size(), k2d.fmt, lv, - VK_IMAGE_USAGE_SAMPLED_BIT); - - ktxTexture2 *ktex = nullptr; - if (ktxTexture2_CreateFromNamedFile(paths.specularCube.c_str(), KTX_TEXTURE_CREATE_LOAD_IMAGE_DATA_BIT, - &ktex) == KTX_SUCCESS && ktex) - { - const VkFormat fmt = static_cast(ktex->vkFormat); - const bool isFloat16 = fmt == VK_FORMAT_R16G16B16A16_SFLOAT; - const bool isFloat32 = fmt == VK_FORMAT_R32G32B32A32_SFLOAT; - if (!ktxTexture2_NeedsTranscoding(ktex) && (isFloat16 || isFloat32) && ktex->baseWidth == 2 * ktex-> - baseHeight) - { - const uint32_t W = ktex->baseWidth; - const uint32_t H = ktex->baseHeight; - const uint8_t *dataPtr = reinterpret_cast( - ktxTexture_GetData(ktxTexture(ktex))); - - // Compute 9 SH coefficients (irradiance) from equirect HDR - struct Vec3 - { - float x, y, z; - }; - auto half_to_float = [](uint16_t h)-> float { - uint16_t h_exp = (h & 0x7C00u) >> 10; - uint16_t h_sig = h & 0x03FFu; - uint32_t sign = (h & 0x8000u) << 16; - uint32_t f_e, f_sig; - if (h_exp == 0) - { - if (h_sig == 0) - { - f_e = 0; - f_sig = 0; - } - else - { - // subnormals - int e = -1; - uint16_t sig = h_sig; - while ((sig & 0x0400u) == 0) - { - sig <<= 1; - --e; - } - sig &= 0x03FFu; - f_e = uint32_t(127 - 15 + e) << 23; - f_sig = uint32_t(sig) << 13; - } - } - else if (h_exp == 0x1Fu) - { - f_e = 0xFFu << 23; - f_sig = uint32_t(h_sig) << 13; - } - else - { - f_e = uint32_t(h_exp - 15 + 127) << 23; - f_sig = uint32_t(h_sig) << 13; - } - uint32_t f = sign | f_e | f_sig; - float out; - std::memcpy(&out, &f, 4); - return out; - }; - - auto sample_at = [&](uint32_t x, uint32_t y)-> Vec3 { - if (isFloat32) - { - const float *px = reinterpret_cast(dataPtr) + 4ull * (y * W + x); - return {px[0], px[1], px[2]}; - } - else - { - const uint16_t *px = reinterpret_cast(dataPtr) + 4ull * (y * W + x); - return {half_to_float(px[0]), half_to_float(px[1]), half_to_float(px[2])}; - } - }; - - constexpr int L = 2; // 2nd order (9 coeffs) - const float dtheta = float(M_PI) / float(H); - const float dphi = 2.f * float(M_PI) / float(W); - // Accumulate RGB SH coeffs - std::array c{}; - for (auto &v: c) v = glm::vec3(0); - - auto sh_basis = [](const glm::vec3 &d)-> std::array { - const float x = d.x, y = d.y, z = d.z; - // Real SH, unnormalized constants - const float c0 = 0.2820947918f; - const float c1 = 0.4886025119f; - const float c2 = 1.0925484306f; - const float c3 = 0.3153915653f; - const float c4 = 0.5462742153f; - return { - c0, - c1 * y, - c1 * z, - c1 * x, - c2 * x * y, - c2 * y * z, - c3 * (3.f * z * z - 1.f), - c2 * x * z, - c4 * (x * x - y * y) - }; - }; - - for (uint32_t y = 0; y < H; ++y) - { - float theta = (y + 0.5f) * dtheta; // [0,pi] - float sinT = std::sin(theta); - for (uint32_t x = 0; x < W; ++x) - { - float phi = (x + 0.5f) * dphi; // [0,2pi] - glm::vec3 dir = glm::vec3(std::cos(phi) * sinT, std::cos(theta), std::sin(phi) * sinT); - auto Lrgb = sample_at(x, y); - glm::vec3 Lvec(Lrgb.x, Lrgb.y, Lrgb.z); - auto Y = sh_basis(dir); - float dOmega = dphi * dtheta * sinT; // solid angle per pixel - for (int i = 0; i < 9; ++i) - { - c[i] += Lvec * (Y[i] * dOmega); - } - } - } - // Convolve with Lambert kernel via per-band scale - const float A0 = float(M_PI); - const float A1 = 2.f * float(M_PI) / 3.f; - const float A2 = float(M_PI) / 4.f; - const float Aband[3] = {A0, A1, A2}; - for (int i = 0; i < 9; ++i) - { - int band = (i == 0) ? 0 : (i < 4 ? 1 : 2); - c[i] *= Aband[band]; - } - - _shBuffer = rm->create_buffer(sizeof(glm::vec4) * 9, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, - VMA_MEMORY_USAGE_CPU_TO_GPU); - for (int i = 0; i < 9; ++i) - { - glm::vec4 v(c[i], 0.0f); - std::memcpy(reinterpret_cast(_shBuffer.info.pMappedData) + i * sizeof(glm::vec4), - &v, sizeof(glm::vec4)); - } - vmaFlushAllocation(_ctx->getDevice()->allocator(), _shBuffer.allocation, 0, - sizeof(glm::vec4) * 9); - } - ktxTexture_Destroy(ktxTexture(ktex)); - } - } + return out; } + data = std::move(state->readyData); + success = state->resultSuccess; + state->resultReady = false; } - // Diffuse cubemap (optional; if missing, reuse specular) - if (!paths.diffuseCube.empty()) + out.completed = true; + if (!success) { - ktxutil::KtxCubemap kcm{}; - if (ktxutil::load_ktx2_cubemap(paths.diffuseCube.c_str(), kcm)) - { - _diff = rm->create_image_compressed_layers( - kcm.bytes.data(), kcm.bytes.size(), - kcm.fmt, kcm.mipLevels, kcm.layers, - kcm.copies, - VK_IMAGE_USAGE_SAMPLED_BIT, - kcm.imgFlags - ); - } - } - if (_diff.image == VK_NULL_HANDLE && _spec.image != VK_NULL_HANDLE) - { - _diff = _spec; + out.success = false; + return out; } - if (!paths.background2D.empty()) - { - ktxutil::Ktx2D bg{}; - if (ktxutil::load_ktx2_2d(paths.background2D.c_str(), bg)) - { - std::vector lv; - lv.reserve(bg.mipLevels); - for (uint32_t mip = 0; mip < bg.mipLevels; ++mip) - { - const auto &r = bg.copies[mip]; - lv.push_back(ResourceManager::MipLevelCopy{ - .offset = r.bufferOffset, - .length = 0, - .width = r.imageExtent.width, - .height = r.imageExtent.height, - }); - } - _background = rm->create_image_compressed( - bg.bytes.data(), bg.bytes.size(), bg.fmt, lv, - VK_IMAGE_USAGE_SAMPLED_BIT); - } - } - - if (_background.image == VK_NULL_HANDLE && _spec.image != VK_NULL_HANDLE) - { - _background = _spec; - } - - // BRDF LUT - if (!paths.brdfLut2D.empty()) - { - ktxutil::Ktx2D lut{}; - if (ktxutil::load_ktx2_2d(paths.brdfLut2D.c_str(), lut)) - { - std::vector lv; - lv.reserve(lut.mipLevels); - for (uint32_t mip = 0; mip < lut.mipLevels; ++mip) - { - const auto &r = lut.copies[mip]; - lv.push_back(ResourceManager::MipLevelCopy{ - .offset = r.bufferOffset, - .length = 0, - .width = r.imageExtent.width, - .height = r.imageExtent.height, - }); - } - _brdf = rm->create_image_compressed(lut.bytes.data(), lut.bytes.size(), lut.fmt, lv, - VK_IMAGE_USAGE_SAMPLED_BIT); - } - } - - return (_spec.image != VK_NULL_HANDLE) && (_diff.image != VK_NULL_HANDLE); + // Commit GPU resources on the main thread. + out.success = commit_prepared(data); + return out; } void IBLManager::unload() { + shutdown_async(); + if (_ctx == nullptr || _ctx->getResources() == nullptr) return; // Destroy images and SH buffer first. @@ -363,3 +498,150 @@ void IBLManager::destroy_images_and_sh() _background = {}; _brdf = {}; } + +void IBLManager::shutdown_async() +{ + if (_async == nullptr) return; + + AsyncStateData *state = _async; + { + std::lock_guard lock(state->mutex); + state->shutdown = true; + state->requestPending = false; + } + state->cv.notify_all(); + if (state->worker.joinable()) + { + state->worker.join(); + } + + delete _async; + _async = nullptr; +} + +bool IBLManager::commit_prepared(const PreparedIBLData &data) +{ + if (_ctx == nullptr || _ctx->getResources() == nullptr) + { + return false; + } + + ResourceManager *rm = _ctx->getResources(); + + if (rm->deferred_uploads() && rm->has_pending_uploads()) + { + rm->process_queued_uploads_immediate(); + } + + destroy_images_and_sh(); + ensureLayout(); + + if (data.has_spec) + { + if (data.spec_is_cubemap) + { + const auto &kcm = data.spec_cubemap; + _spec = rm->create_image_compressed_layers( + kcm.bytes.data(), kcm.bytes.size(), + kcm.fmt, kcm.mipLevels, kcm.layers, + kcm.copies, + VK_IMAGE_USAGE_SAMPLED_BIT, + kcm.imgFlags); + } + else + { + const auto &k2d = data.spec_2d; + std::vector lv; + lv.reserve(k2d.mipLevels); + for (uint32_t mip = 0; mip < k2d.mipLevels; ++mip) + { + const auto &r = k2d.copies[mip]; + lv.push_back(ResourceManager::MipLevelCopy{ + .offset = r.bufferOffset, + .length = 0, + .width = r.imageExtent.width, + .height = r.imageExtent.height, + }); + } + _spec = rm->create_image_compressed( + k2d.bytes.data(), k2d.bytes.size(), k2d.fmt, lv, + VK_IMAGE_USAGE_SAMPLED_BIT); + + if (data.has_sh) + { + _shBuffer = rm->create_buffer(sizeof(glm::vec4) * 9, + VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, + VMA_MEMORY_USAGE_CPU_TO_GPU); + for (int i = 0; i < 9; ++i) + { + std::memcpy(reinterpret_cast(_shBuffer.info.pMappedData) + i * sizeof(glm::vec4), + &data.sh[i], sizeof(glm::vec4)); + } + vmaFlushAllocation(_ctx->getDevice()->allocator(), _shBuffer.allocation, 0, + sizeof(glm::vec4) * 9); + } + } + } + + if (data.has_diffuse) + { + const auto &kcm = data.diff_cubemap; + _diff = rm->create_image_compressed_layers( + kcm.bytes.data(), kcm.bytes.size(), + kcm.fmt, kcm.mipLevels, kcm.layers, + kcm.copies, + VK_IMAGE_USAGE_SAMPLED_BIT, + kcm.imgFlags); + } + if (_diff.image == VK_NULL_HANDLE && _spec.image != VK_NULL_HANDLE) + { + _diff = _spec; + } + + if (data.has_background) + { + const auto &bg = data.background_2d; + std::vector lv; + lv.reserve(bg.mipLevels); + for (uint32_t mip = 0; mip < bg.mipLevels; ++mip) + { + const auto &r = bg.copies[mip]; + lv.push_back(ResourceManager::MipLevelCopy{ + .offset = r.bufferOffset, + .length = 0, + .width = r.imageExtent.width, + .height = r.imageExtent.height, + }); + } + _background = rm->create_image_compressed( + bg.bytes.data(), bg.bytes.size(), bg.fmt, lv, + VK_IMAGE_USAGE_SAMPLED_BIT); + } + + if (_background.image == VK_NULL_HANDLE && _spec.image != VK_NULL_HANDLE) + { + _background = _spec; + } + + if (data.has_brdf) + { + const auto &lut = data.brdf_2d; + std::vector lv; + lv.reserve(lut.mipLevels); + for (uint32_t mip = 0; mip < lut.mipLevels; ++mip) + { + const auto &r = lut.copies[mip]; + lv.push_back(ResourceManager::MipLevelCopy{ + .offset = r.bufferOffset, + .length = 0, + .width = r.imageExtent.width, + .height = r.imageExtent.height, + }); + } + _brdf = rm->create_image_compressed( + lut.bytes.data(), lut.bytes.size(), lut.fmt, lv, + VK_IMAGE_USAGE_SAMPLED_BIT); + } + + return (_spec.image != VK_NULL_HANDLE) && (_diff.image != VK_NULL_HANDLE); +} diff --git a/src/core/assets/ibl_manager.h b/src/core/assets/ibl_manager.h index 0db490c..1eb72d7 100644 --- a/src/core/assets/ibl_manager.h +++ b/src/core/assets/ibl_manager.h @@ -7,6 +7,8 @@ class TextureCache; class EngineContext; +struct PreparedIBLData; + struct IBLPaths { std::string specularCube; // .ktx2 (GPU-ready BC6H or R16G16B16A16) @@ -20,13 +22,35 @@ struct IBLPaths class IBLManager { public: - void init(EngineContext *ctx) { _ctx = ctx; } + IBLManager() = default; + ~IBLManager(); + + void init(EngineContext *ctx); void set_texture_cache(TextureCache *cache) { _cache = cache; } // Load all three textures. Returns true when specular+diffuse (and optional LUT) are resident. bool load(const IBLPaths &paths); + // Asynchronous IBL load: + // - Performs KTX2 file I/O and SH bake on a background thread. + // - GPU image creation and SH upload are deferred to pump_async() on the main thread. + // Returns false if the job could not be queued. + bool load_async(const IBLPaths &paths); + + struct AsyncResult + { + // True when an async job finished since the last pump_async() call. + bool completed{false}; + // True when the finished job successfully produced new GPU IBL resources. + bool success{false}; + }; + + // Main-thread integration: if a completed async job is pending, destroy the + // previous IBL images/SH and upload the new ones. Must be called only when + // the GPU is idle for the previous frame. + AsyncResult pump_async(); + // Release GPU memory and patch to fallbacks handled by the caller. void unload(); @@ -57,6 +81,13 @@ private: VkDescriptorSetLayout _iblSetLayout = VK_NULL_HANDLE; AllocatedBuffer _shBuffer{}; // 9*vec4 coefficients (RGB in .xyz) + struct AsyncStateData; + AsyncStateData *_async{nullptr}; + + bool commit_prepared(const PreparedIBLData &data); + // Destroy current GPU images/SH buffer but keep descriptor layout alive. void destroy_images_and_sh(); + + void shutdown_async(); }; diff --git a/src/core/engine.cpp b/src/core/engine.cpp index 121cfca..232e047 100644 --- a/src/core/engine.cpp +++ b/src/core/engine.cpp @@ -250,7 +250,7 @@ void VulkanEngine::init() // Publish to context for passes and pipeline layout assembly _context->ibl = _iblManager.get(); - // Try to load default IBL assets if present + // Try to load default IBL assets if present (async) { IBLPaths ibl{}; ibl.specularCube = _assetManager->assetPath("ibl/docklands.ktx2"); @@ -262,13 +262,21 @@ void VulkanEngine::init() // Treat this as the global/fallback IBL used outside any local volume. _globalIBLPaths = ibl; _activeIBLVolume = -1; - bool ibl_ok = _iblManager->load(ibl); - _hasGlobalIBL = ibl_ok; - if (!ibl_ok) + _hasGlobalIBL = false; + if (_iblManager) { - fmt::println("[Engine] Warning: failed to load default IBL (specular='{}', brdfLut='{}'). IBL lighting will be disabled until a valid IBL is loaded.", - ibl.specularCube, - ibl.brdfLut2D); + if (_iblManager->load_async(ibl)) + { + _pendingIBLRequest.active = true; + _pendingIBLRequest.targetVolume = -1; + _pendingIBLRequest.paths = ibl; + } + else + { + fmt::println("[Engine] Warning: failed to enqueue default IBL load (specular='{}', brdfLut='{}'). IBL lighting will be disabled until a valid IBL is loaded.", + ibl.specularCube, + ibl.brdfLut2D); + } } } @@ -436,6 +444,56 @@ bool VulkanEngine::addGLTFInstance(const std::string &instanceName, return true; } +bool VulkanEngine::addPrimitiveInstance(const std::string &instanceName, + AssetManager::MeshGeometryDesc::Type geomType, + const glm::mat4 &transform, + const AssetManager::MeshMaterialDesc &material, + std::optional boundsTypeOverride) +{ + if (!_assetManager || !_sceneManager) + { + return false; + } + + // Build a cache key for the primitive mesh so multiple instances + // share the same GPU buffers. + std::string meshName; + switch (geomType) + { + case AssetManager::MeshGeometryDesc::Type::Cube: + meshName = "Primitive.Cube"; + break; + case AssetManager::MeshGeometryDesc::Type::Sphere: + meshName = "Primitive.Sphere"; + break; + case AssetManager::MeshGeometryDesc::Type::Plane: + meshName = "Primitive.Plane"; + break; + case AssetManager::MeshGeometryDesc::Type::Capsule: + meshName = "Primitive.Capsule"; + break; + case AssetManager::MeshGeometryDesc::Type::Provided: + default: + // Provided geometry requires explicit vertex/index data; not supported here. + return false; + } + + AssetManager::MeshCreateInfo ci{}; + ci.name = meshName; + ci.geometry.type = geomType; + ci.material = material; + ci.boundsType = boundsTypeOverride; + + auto mesh = _assetManager->createMesh(ci); + if (!mesh) + { + return false; + } + + _sceneManager->addMeshInstance(instanceName, mesh, transform, boundsTypeOverride); + return true; +} + uint32_t VulkanEngine::loadGLTFAsync(const std::string &sceneName, const std::string &modelRelativePath, const glm::mat4 &transform, @@ -627,6 +685,7 @@ void VulkanEngine::draw() break; } } + if (newVolume != _activeIBLVolume) { const IBLPaths *paths = nullptr; @@ -639,17 +698,25 @@ void VulkanEngine::draw() paths = &_globalIBLPaths; } - if (paths) + // Avoid enqueueing duplicate jobs for the same target volume. + const bool alreadyPendingForTarget = + _pendingIBLRequest.active && _pendingIBLRequest.targetVolume == newVolume; + + if (paths && !alreadyPendingForTarget) { - bool ibl_ok = _iblManager->load(*paths); - if (!ibl_ok) + if (_iblManager->load_async(*paths)) { - fmt::println("[Engine] Warning: failed to load IBL for {} (specular='{}')", + _pendingIBLRequest.active = true; + _pendingIBLRequest.targetVolume = newVolume; + _pendingIBLRequest.paths = *paths; + } + else + { + fmt::println("[Engine] Warning: failed to enqueue IBL load for {} (specular='{}')", (newVolume >= 0) ? "volume" : "global environment", paths->specularCube); } } - _activeIBLVolume = newVolume; } } @@ -1118,6 +1185,33 @@ void VulkanEngine::run() // Safe to destroy any BLAS queued for deletion now that the previous frame is idle. if (_rayManager) { _rayManager->flushPendingDeletes(); } + // Commit any completed async IBL load now that the GPU is idle. + if (_iblManager && _pendingIBLRequest.active) + { + IBLManager::AsyncResult iblRes = _iblManager->pump_async(); + if (iblRes.completed) + { + if (iblRes.success) + { + if (_pendingIBLRequest.targetVolume >= 0) + { + _activeIBLVolume = _pendingIBLRequest.targetVolume; + } + else + { + _activeIBLVolume = -1; + _hasGlobalIBL = true; + } + } + else + { + fmt::println("[Engine] Warning: async IBL load failed (specular='{}')", + _pendingIBLRequest.paths.specularCube); + } + _pendingIBLRequest.active = false; + } + } + if (_pickResultPending && _pickReadbackBuffer.buffer && _sceneManager) { vmaInvalidateAllocation(_deviceManager->allocator(), _pickReadbackBuffer.allocation, 0, sizeof(uint32_t)); diff --git a/src/core/engine.h b/src/core/engine.h index 6263b6d..ab8aa97 100644 --- a/src/core/engine.h +++ b/src/core/engine.h @@ -133,6 +133,13 @@ public: // User-defined local IBL volumes and currently active index (-1 = global). std::vector _iblVolumes; int _activeIBLVolume{-1}; + // Pending async IBL request (global or volume). targetVolume = -1 means global. + struct PendingIBLRequest + { + bool active{false}; + int targetVolume{-1}; + IBLPaths paths{}; + } _pendingIBLRequest; struct PickInfo { @@ -206,6 +213,20 @@ public: const glm::mat4 &transform = glm::mat4(1.f), bool preloadTextures = false); + // Spawn a runtime primitive mesh instance (cube/sphere/plane/capsule). + // - instanceName is the unique key for this object in SceneManager. + // - geomType selects which analytic primitive to build. + // - material controls whether the primitive uses the default PBR material + // or a textured material (see AssetManager::MeshMaterialDesc). + // - boundsTypeOverride can force a specific bounds type for picking. + // The underlying mesh is cached in AssetManager using a per-primitive name, + // so multiple instances share GPU buffers. + bool addPrimitiveInstance(const std::string &instanceName, + AssetManager::MeshGeometryDesc::Type geomType, + const glm::mat4 &transform = glm::mat4(1.f), + const AssetManager::MeshMaterialDesc &material = {}, + std::optional boundsTypeOverride = {}); + // Asynchronous glTF load that reports progress via AsyncAssetLoader. // Returns a JobID that can be queried via AsyncAssetLoader. // If preloadTextures is true, textures will be immediately marked for loading to VRAM. diff --git a/src/core/engine_ui.cpp b/src/core/engine_ui.cpp index ec72605..b88b6cb 100644 --- a/src/core/engine_ui.cpp +++ b/src/core/engine_ui.cpp @@ -242,19 +242,27 @@ namespace { if (eng->_iblManager && vol.enabled) { - eng->_iblManager->load(vol.paths); - eng->_activeIBLVolume = static_cast(i); + if (eng->_iblManager->load_async(vol.paths)) + { + eng->_pendingIBLRequest.active = true; + eng->_pendingIBLRequest.targetVolume = static_cast(i); + eng->_pendingIBLRequest.paths = vol.paths; + } } } ImGui::SameLine(); if (ImGui::Button("Set As Global IBL")) { eng->_globalIBLPaths = vol.paths; - eng->_hasGlobalIBL = true; - eng->_activeIBLVolume = -1; if (eng->_iblManager) { - eng->_iblManager->load(eng->_globalIBLPaths); + if (eng->_iblManager->load_async(eng->_globalIBLPaths)) + { + eng->_pendingIBLRequest.active = true; + eng->_pendingIBLRequest.targetVolume = -1; + eng->_pendingIBLRequest.paths = eng->_globalIBLPaths; + eng->_hasGlobalIBL = false; + } } }