diff --git a/docs/IBL.md b/docs/IBL.md
index 8995b32..2900170 100644
--- a/docs/IBL.md
+++ b/docs/IBL.md
@@ -13,7 +13,7 @@ Data Flow
   - `VulkanEngine::init_vulkan()` creates an `IBLManager`, calls `init(context)`, and publishes it via `EngineContext::ibl`.
   - The engine optionally loads default IBL assets (`IBLPaths` in `src/core/engine.cpp`), typically a BRDF LUT plus a specular environment `.ktx2`.
 - Loading (IBLManager):
-  - `IBLManager::load(const IBLPaths&)`:
+  - `IBLManager::load(const IBLPaths&)` (synchronous, mostly used in tools/tests):
     - Specular:
       - Tries `ktxutil::load_ktx2_cubemap` first. If successful, uploads via `ResourceManager::create_image_compressed_layers` with `VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT`.
       - If cubemap loading fails, falls back to 2D `.ktx2` via `ktxutil::load_ktx2_2d` and `create_image_compressed`. The image is treated as equirectangular with prefiltered mips.
@@ -28,6 +28,12 @@ Data Flow
       - Loaded as 2D `.ktx2` via `ktxutil::load_ktx2_2d` and uploaded with `create_image_compressed`.
     - Fallbacks:
       - If `diffuseCube` is missing but a specular env exists, `_diff` is aliased to `_spec`.
+  - `IBLManager::load_async(const IBLPaths&)` + `IBLManager::pump_async()` (runtime path used by the engine):
+    - `load_async` runs KTX2 file I/O and SH bake on a worker thread and stores a prepared CPU-side description (`PreparedIBLData`).
+    - `pump_async` is called on the main thread once per frame (after the previous frame is idle) to:
+      - Destroy old IBL images/SH via `destroy_images_and_sh()`.
+      - Create new GPU images with `create_image_compressed(_layers)` and upload the SH buffer.
+    - This avoids stalls in the main/game loop when switching IBL volumes or loading the default environment at startup.
   - `IBLManager::unload()` releases GPU images, the SH buffer, and the descriptor set layout.
 - Descriptor layout:
   - `IBLManager::ensureLayout()` builds a descriptor set layout (set=3) with:
diff --git a/docs/TextureLoading.md b/docs/TextureLoading.md
index 404a888..27fe86b 100644
--- a/docs/TextureLoading.md
+++ b/docs/TextureLoading.md
@@ -101,7 +101,7 @@ Image‑Based Lighting (IBL) Textures
   - Specular:
     - If `specularCube` is a cubemap `.ktx2`, `IBLManager` uses `ktxutil::load_ktx2_cubemap` and uploads via `ResourceManager::create_image_compressed_layers`, preserving the file’s format and mip chain.
     - If cubemap load fails, it falls back to 2D `.ktx2` via `ktxutil::load_ktx2_2d` + `ResourceManager::create_image_compressed`. The image is treated as equirectangular with prefiltered mips and sampled with explicit LOD in shaders.
-    - If the format is float HDR (`R16G16B16A16_SFLOAT` or `R32G32B32A32_SFLOAT`) and the aspect ratio is 2:1, `IBLManager` additionally computes 2nd‑order SH coefficients (9×`vec3`) on the CPU for diffuse irradiance and uploads them to a UBO (`_shBuffer`).
+    - If the format is float HDR (`R16G16B16A16_SFLOAT` or `R32G32B32A32_SFLOAT`) and the aspect ratio is 2:1, `IBLManager` additionally computes 2nd‑order SH coefficients (9×`vec3`) on a worker thread and uploads them to a UBO (`_shBuffer`) when `pump_async()` is called on the main thread.
   - Diffuse (optional):
     - If `diffuseCube` is provided and valid, it is uploaded as a cubemap using `create_image_compressed_layers`. Current shaders use the SH buffer for diffuse; this cubemap can be wired into a future path if you want to sample it directly.
   - BRDF LUT:
diff --git a/src/core/assets/ibl_manager.cpp b/src/core/assets/ibl_manager.cpp
index 9ee952a..ceafab2 100644
--- a/src/core/assets/ibl_manager.cpp
+++ b/src/core/assets/ibl_manager.cpp
@@ -8,291 +8,426 @@
 #include <algorithm>
 #include <ktx.h>
 #include <SDL_stdinc.h>
+#include <thread>
+#include <mutex>
+#include <condition_variable>
 
 #include "core/device/device.h"
 #include "core/assets/texture_cache.h"
 
+struct PreparedIBLData
+{
+    IBLPaths paths{};
+
+    bool has_spec{false};
+    bool spec_is_cubemap{false};
+    ktxutil::KtxCubemap spec_cubemap{};
+    ktxutil::Ktx2D spec_2d{};
+
+    bool has_diffuse{false};
+    ktxutil::KtxCubemap diff_cubemap{};
+
+    bool has_background{false};
+    ktxutil::Ktx2D background_2d{};
+
+    bool has_brdf{false};
+    ktxutil::Ktx2D brdf_2d{};
+
+    bool has_sh{false};
+    glm::vec4 sh[9]{};
+};
+
+namespace
+{
+    static bool compute_sh_from_ktx2_equirect(const char *path, glm::vec4 out_sh[9])
+    {
+        if (path == nullptr) return false;
+
+        ktxTexture2 *ktex = nullptr;
+        if (ktxTexture2_CreateFromNamedFile(path, KTX_TEXTURE_CREATE_LOAD_IMAGE_DATA_BIT, &ktex) != KTX_SUCCESS || !ktex)
+        {
+            return false;
+        }
+
+        bool ok = false;
+        const VkFormat fmt = static_cast<VkFormat>(ktex->vkFormat);
+        const bool isFloat16 = fmt == VK_FORMAT_R16G16B16A16_SFLOAT;
+        const bool isFloat32 = fmt == VK_FORMAT_R32G32B32A32_SFLOAT;
+        if (!ktxTexture2_NeedsTranscoding(ktex) && (isFloat16 || isFloat32) && ktex->baseWidth == 2 * ktex->baseHeight)
+        {
+            const uint32_t W = ktex->baseWidth;
+            const uint32_t H = ktex->baseHeight;
+            const uint8_t *dataPtr = reinterpret_cast<const uint8_t *>(ktxTexture_GetData(ktxTexture(ktex)));
+
+            struct Vec3
+            {
+                float x, y, z;
+            };
+
+            auto half_to_float = [](uint16_t h) -> float {
+                uint16_t h_exp = (h & 0x7C00u) >> 10;
+                uint16_t h_sig = h & 0x03FFu;
+                uint32_t sign = (h & 0x8000u) << 16;
+                uint32_t f_e, f_sig;
+                if (h_exp == 0)
+                {
+                    if (h_sig == 0)
+                    {
+                        f_e = 0;
+                        f_sig = 0;
+                    }
+                    else
+                    {
+                        int e = -1;
+                        uint16_t sig = h_sig;
+                        while ((sig & 0x0400u) == 0)
+                        {
+                            sig <<= 1;
+                            --e;
+                        }
+                        sig &= 0x03FFu;
+                        f_e = uint32_t(127 - 15 + e) << 23;
+                        f_sig = uint32_t(sig) << 13;
+                    }
+                }
+                else if (h_exp == 0x1Fu)
+                {
+                    f_e = 0xFFu << 23;
+                    f_sig = uint32_t(h_sig) << 13;
+                }
+                else
+                {
+                    f_e = uint32_t(h_exp - 15 + 127) << 23;
+                    f_sig = uint32_t(h_sig) << 13;
+                }
+                uint32_t f = sign | f_e | f_sig;
+                float out;
+                std::memcpy(&out, &f, 4);
+                return out;
+            };
+
+            auto sample_at = [&](uint32_t x, uint32_t y) -> Vec3 {
+                if (isFloat32)
+                {
+                    const float *px = reinterpret_cast<const float *>(dataPtr) + 4ull * (y * W + x);
+                    return {px[0], px[1], px[2]};
+                }
+                else
+                {
+                    const uint16_t *px = reinterpret_cast<const uint16_t *>(dataPtr) + 4ull * (y * W + x);
+                    return {half_to_float(px[0]), half_to_float(px[1]), half_to_float(px[2])};
+                }
+            };
+
+            const float dtheta = float(M_PI) / float(H);
+            const float dphi = 2.f * float(M_PI) / float(W);
+
+            std::array<glm::vec3, 9> c{};
+            for (auto &v : c) v = glm::vec3(0.0f);
+
+            auto sh_basis = [](const glm::vec3 &d) -> std::array<float, 9> {
+                const float x = d.x, y = d.y, z = d.z;
+                const float c0 = 0.2820947918f;
+                const float c1 = 0.4886025119f;
+                const float c2 = 1.0925484306f;
+                const float c3 = 0.3153915653f;
+                const float c4 = 0.5462742153f;
+                return {
+                    c0,
+                    c1 * y,
+                    c1 * z,
+                    c1 * x,
+                    c2 * x * y,
+                    c2 * y * z,
+                    c3 * (3.f * z * z - 1.f),
+                    c2 * x * z,
+                    c4 * (x * x - y * y)
+                };
+            };
+
+            for (uint32_t y = 0; y < H; ++y)
+            {
+                float theta = (y + 0.5f) * dtheta;
+                float sinT = std::sin(theta);
+                for (uint32_t x = 0; x < W; ++x)
+                {
+                    float phi = (x + 0.5f) * dphi;
+                    glm::vec3 dir = glm::vec3(std::cos(phi) * sinT, std::cos(theta), std::sin(phi) * sinT);
+                    auto Lrgb = sample_at(x, y);
+                    glm::vec3 Lvec(Lrgb.x, Lrgb.y, Lrgb.z);
+                    auto Y = sh_basis(dir);
+                    float dOmega = dphi * dtheta * sinT;
+                    for (int i = 0; i < 9; ++i)
+                    {
+                        c[i] += Lvec * (Y[i] * dOmega);
+                    }
+                }
+            }
+
+            const float A0 = float(M_PI);
+            const float A1 = 2.f * float(M_PI) / 3.f;
+            const float A2 = float(M_PI) / 4.f;
+            const float Aband[3] = {A0, A1, A2};
+            for (int i = 0; i < 9; ++i)
+            {
+                int band = (i == 0) ? 0 : (i < 4 ? 1 : 2);
+                c[i] *= Aband[band];
+                out_sh[i] = glm::vec4(c[i], 0.0f);
+            }
+
+            ok = true;
+        }
+
+        ktxTexture_Destroy(ktxTexture(ktex));
+        return ok;
+    }
+
+    static bool prepare_ibl_cpu(const IBLPaths &paths, PreparedIBLData &outData, std::string &outError)
+    {
+        outData = PreparedIBLData{};
+        outData.paths = paths;
+        outError.clear();
+
+        if (!paths.specularCube.empty())
+        {
+            ktxutil::KtxCubemap cube{};
+            if (ktxutil::load_ktx2_cubemap(paths.specularCube.c_str(), cube))
+            {
+                outData.has_spec = true;
+                outData.spec_is_cubemap = true;
+                outData.spec_cubemap = std::move(cube);
+            }
+            else
+            {
+                ktxutil::Ktx2D k2d{};
+                if (ktxutil::load_ktx2_2d(paths.specularCube.c_str(), k2d))
+                {
+                    outData.has_spec = true;
+                    outData.spec_is_cubemap = false;
+                    outData.spec_2d = std::move(k2d);
+
+                    glm::vec4 sh[9]{};
+                    if (compute_sh_from_ktx2_equirect(paths.specularCube.c_str(), sh))
+                    {
+                        outData.has_sh = true;
+                        for (int i = 0; i < 9; ++i)
+                        {
+                            outData.sh[i] = sh[i];
+                        }
+                    }
+                }
+                else
+                {
+                    outError = "Failed to load specular IBL as cubemap or 2D KTX2";
+                }
+            }
+        }
+
+        if (!paths.diffuseCube.empty())
+        {
+            ktxutil::KtxCubemap diff{};
+            if (ktxutil::load_ktx2_cubemap(paths.diffuseCube.c_str(), diff))
+            {
+                outData.has_diffuse = true;
+                outData.diff_cubemap = std::move(diff);
+            }
+        }
+
+        if (!paths.background2D.empty())
+        {
+            ktxutil::Ktx2D bg{};
+            if (ktxutil::load_ktx2_2d(paths.background2D.c_str(), bg))
+            {
+                outData.has_background = true;
+                outData.background_2d = std::move(bg);
+            }
+        }
+
+        if (!paths.brdfLut2D.empty())
+        {
+            ktxutil::Ktx2D lut{};
+            if (ktxutil::load_ktx2_2d(paths.brdfLut2D.c_str(), lut))
+            {
+                outData.has_brdf = true;
+                outData.brdf_2d = std::move(lut);
+            }
+        }
+
+        // Success is defined by having a specular environment; diffuse/background/BRDF are optional.
+        if (!outData.has_spec)
+        {
+            if (outError.empty())
+            {
+                outError = "Specular IBL KTX2 not found or invalid";
+            }
+            return false;
+        }
+        return true;
+    }
+}
+
+struct IBLManager::AsyncStateData
+{
+    std::mutex mutex;
+    std::condition_variable cv;
+    bool shutdown{false};
+
+    bool requestPending{false};
+    IBLPaths requestPaths{};
+    uint64_t requestId{0};
+
+    bool resultReady{false};
+    bool resultSuccess{false};
+    PreparedIBLData readyData{};
+    std::string lastError;
+    uint64_t resultId{0};
+
+    std::thread worker;
+};
+
+IBLManager::~IBLManager()
+{
+    shutdown_async();
+}
+
+void IBLManager::init(EngineContext *ctx)
+{
+    _ctx = ctx;
+
+    if (_async != nullptr)
+    {
+        return;
+    }
+
+    _async = new AsyncStateData();
+    AsyncStateData *state = _async;
+
+    state->worker = std::thread([this, state]() {
+        for (;;)
+        {
+            IBLPaths paths{};
+            uint64_t jobId = 0;
+            {
+                std::unique_lock<std::mutex> lock(state->mutex);
+                state->cv.wait(lock, [state]() { return state->shutdown || state->requestPending; });
+                if (state->shutdown)
+                {
+                    break;
+                }
+                paths = state->requestPaths;
+                jobId = state->requestId;
+                state->requestPending = false;
+            }
+
+            PreparedIBLData data{};
+            std::string error;
+            bool ok = prepare_ibl_cpu(paths, data, error);
+
+            {
+                std::lock_guard<std::mutex> lock(state->mutex);
+                if (state->shutdown)
+                {
+                    break;
+                }
+                // Drop results for superseded jobs.
+                if (jobId != state->requestId)
+                {
+                    continue;
+                }
+
+                state->readyData = std::move(data);
+                state->lastError = std::move(error);
+                state->resultSuccess = ok;
+                state->resultReady = true;
+                state->resultId = jobId;
+            }
+        }
+    });
+}
+
 bool IBLManager::load(const IBLPaths &paths)
 {
     if (_ctx == nullptr || _ctx->getResources() == nullptr) return false;
-    ResourceManager *rm = _ctx->getResources();
 
-    // When uploads are deferred into the RenderGraph, any previously queued
-    // image uploads might still reference VkImage handles owned by this
-    // manager. Before destroying or recreating IBL images, flush those
-    // uploads via the immediate path so we never record barriers or copies
-    // for images that have been destroyed.
-    if (rm->deferred_uploads() && rm->has_pending_uploads())
+    PreparedIBLData data{};
+    std::string error;
+    if (!prepare_ibl_cpu(paths, data, error))
     {
-        rm->process_queued_uploads_immediate();
-    }
-
-    // Allow reloading at runtime: destroy previous images/SH but keep layout.
-    destroy_images_and_sh();
-    ensureLayout();
-
-    // Load specular environment: prefer cubemap; fallback to 2D equirect with mips.
-    // Also hint the TextureCache (if present) so future switches are cheap.
-    if (!paths.specularCube.empty())
-    {
-        // Try as cubemap first
-        ktxutil::KtxCubemap kcm{};
-        if (ktxutil::load_ktx2_cubemap(paths.specularCube.c_str(), kcm))
+        if (!error.empty())
         {
-            _spec = rm->create_image_compressed_layers(
-                kcm.bytes.data(), kcm.bytes.size(),
-                kcm.fmt, kcm.mipLevels, kcm.layers,
-                kcm.copies,
-                VK_IMAGE_USAGE_SAMPLED_BIT,
-                kcm.imgFlags
-            );
+            fmt::println("[IBL] load failed: {}", error);
         }
-        else
+        return false;
+    }
+
+    return commit_prepared(data);
+}
+
+bool IBLManager::load_async(const IBLPaths &paths)
+{
+    if (_ctx == nullptr || _ctx->getResources() == nullptr)
+    {
+        return false;
+    }
+
+    if (_async == nullptr)
+    {
+        init(_ctx);
+    }
+
+    AsyncStateData *state = _async;
+    {
+        std::lock_guard<std::mutex> lock(state->mutex);
+        state->requestPaths = paths;
+        state->requestPending = true;
+        state->requestId++;
+        // Invalidate any previous ready result; it will be superseded by this job.
+        state->resultReady = false;
+    }
+    state->cv.notify_one();
+    return true;
+}
+
+IBLManager::AsyncResult IBLManager::pump_async()
+{
+    AsyncResult out{};
+
+    if (_async == nullptr || _ctx == nullptr || _ctx->getResources() == nullptr)
+    {
+        return out;
+    }
+
+    AsyncStateData *state = _async;
+
+    PreparedIBLData data{};
+    bool success = false;
+    {
+        std::lock_guard<std::mutex> lock(state->mutex);
+        if (!state->resultReady)
         {
-            ktxutil::Ktx2D k2d{};
-            if (ktxutil::load_ktx2_2d(paths.specularCube.c_str(), k2d))
-            {
-                std::vector<ResourceManager::MipLevelCopy> lv;
-                lv.reserve(k2d.mipLevels);
-                for (uint32_t mip = 0; mip < k2d.mipLevels; ++mip)
-                {
-                    const auto &r = k2d.copies[mip];
-                    lv.push_back(ResourceManager::MipLevelCopy{
-                        .offset = r.bufferOffset,
-                        .length = 0,
-                        .width = r.imageExtent.width,
-                        .height = r.imageExtent.height,
-                    });
-                }
-                _spec = rm->create_image_compressed(k2d.bytes.data(), k2d.bytes.size(), k2d.fmt, lv,
-                                                    VK_IMAGE_USAGE_SAMPLED_BIT);
-
-                ktxTexture2 *ktex = nullptr;
-                if (ktxTexture2_CreateFromNamedFile(paths.specularCube.c_str(), KTX_TEXTURE_CREATE_LOAD_IMAGE_DATA_BIT,
-                                                    &ktex) == KTX_SUCCESS && ktex)
-                {
-                    const VkFormat fmt = static_cast<VkFormat>(ktex->vkFormat);
-                    const bool isFloat16 = fmt == VK_FORMAT_R16G16B16A16_SFLOAT;
-                    const bool isFloat32 = fmt == VK_FORMAT_R32G32B32A32_SFLOAT;
-                    if (!ktxTexture2_NeedsTranscoding(ktex) && (isFloat16 || isFloat32) && ktex->baseWidth == 2 * ktex->
-                        baseHeight)
-                    {
-                        const uint32_t W = ktex->baseWidth;
-                        const uint32_t H = ktex->baseHeight;
-                        const uint8_t *dataPtr = reinterpret_cast<const uint8_t *>(
-                            ktxTexture_GetData(ktxTexture(ktex)));
-
-                        // Compute 9 SH coefficients (irradiance) from equirect HDR
-                        struct Vec3
-                        {
-                            float x, y, z;
-                        };
-                        auto half_to_float = [](uint16_t h)-> float {
-                            uint16_t h_exp = (h & 0x7C00u) >> 10;
-                            uint16_t h_sig = h & 0x03FFu;
-                            uint32_t sign = (h & 0x8000u) << 16;
-                            uint32_t f_e, f_sig;
-                            if (h_exp == 0)
-                            {
-                                if (h_sig == 0)
-                                {
-                                    f_e = 0;
-                                    f_sig = 0;
-                                }
-                                else
-                                {
-                                    // subnormals
-                                    int e = -1;
-                                    uint16_t sig = h_sig;
-                                    while ((sig & 0x0400u) == 0)
-                                    {
-                                        sig <<= 1;
-                                        --e;
-                                    }
-                                    sig &= 0x03FFu;
-                                    f_e = uint32_t(127 - 15 + e) << 23;
-                                    f_sig = uint32_t(sig) << 13;
-                                }
-                            }
-                            else if (h_exp == 0x1Fu)
-                            {
-                                f_e = 0xFFu << 23;
-                                f_sig = uint32_t(h_sig) << 13;
-                            }
-                            else
-                            {
-                                f_e = uint32_t(h_exp - 15 + 127) << 23;
-                                f_sig = uint32_t(h_sig) << 13;
-                            }
-                            uint32_t f = sign | f_e | f_sig;
-                            float out;
-                            std::memcpy(&out, &f, 4);
-                            return out;
-                        };
-
-                        auto sample_at = [&](uint32_t x, uint32_t y)-> Vec3 {
-                            if (isFloat32)
-                            {
-                                const float *px = reinterpret_cast<const float *>(dataPtr) + 4ull * (y * W + x);
-                                return {px[0], px[1], px[2]};
-                            }
-                            else
-                            {
-                                const uint16_t *px = reinterpret_cast<const uint16_t *>(dataPtr) + 4ull * (y * W + x);
-                                return {half_to_float(px[0]), half_to_float(px[1]), half_to_float(px[2])};
-                            }
-                        };
-
-                        constexpr int L = 2; // 2nd order (9 coeffs)
-                        const float dtheta = float(M_PI) / float(H);
-                        const float dphi = 2.f * float(M_PI) / float(W);
-                        // Accumulate RGB SH coeffs
-                        std::array<glm::vec3, 9> c{};
-                        for (auto &v: c) v = glm::vec3(0);
-
-                        auto sh_basis = [](const glm::vec3 &d)-> std::array<float, 9> {
-                            const float x = d.x, y = d.y, z = d.z;
-                            // Real SH, unnormalized constants
-                            const float c0 = 0.2820947918f;
-                            const float c1 = 0.4886025119f;
-                            const float c2 = 1.0925484306f;
-                            const float c3 = 0.3153915653f;
-                            const float c4 = 0.5462742153f;
-                            return {
-                                c0,
-                                c1 * y,
-                                c1 * z,
-                                c1 * x,
-                                c2 * x * y,
-                                c2 * y * z,
-                                c3 * (3.f * z * z - 1.f),
-                                c2 * x * z,
-                                c4 * (x * x - y * y)
-                            };
-                        };
-
-                        for (uint32_t y = 0; y < H; ++y)
-                        {
-                            float theta = (y + 0.5f) * dtheta; // [0,pi]
-                            float sinT = std::sin(theta);
-                            for (uint32_t x = 0; x < W; ++x)
-                            {
-                                float phi = (x + 0.5f) * dphi; // [0,2pi]
-                                glm::vec3 dir = glm::vec3(std::cos(phi) * sinT, std::cos(theta), std::sin(phi) * sinT);
-                                auto Lrgb = sample_at(x, y);
-                                glm::vec3 Lvec(Lrgb.x, Lrgb.y, Lrgb.z);
-                                auto Y = sh_basis(dir);
-                                float dOmega = dphi * dtheta * sinT; // solid angle per pixel
-                                for (int i = 0; i < 9; ++i)
-                                {
-                                    c[i] += Lvec * (Y[i] * dOmega);
-                                }
-                            }
-                        }
-                        // Convolve with Lambert kernel via per-band scale
-                        const float A0 = float(M_PI);
-                        const float A1 = 2.f * float(M_PI) / 3.f;
-                        const float A2 = float(M_PI) / 4.f;
-                        const float Aband[3] = {A0, A1, A2};
-                        for (int i = 0; i < 9; ++i)
-                        {
-                            int band = (i == 0) ? 0 : (i < 4 ? 1 : 2);
-                            c[i] *= Aband[band];
-                        }
-
-                        _shBuffer = rm->create_buffer(sizeof(glm::vec4) * 9, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
-                                                      VMA_MEMORY_USAGE_CPU_TO_GPU);
-                        for (int i = 0; i < 9; ++i)
-                        {
-                            glm::vec4 v(c[i], 0.0f);
-                            std::memcpy(reinterpret_cast<char *>(_shBuffer.info.pMappedData) + i * sizeof(glm::vec4),
-                                        &v, sizeof(glm::vec4));
-                        }
-                        vmaFlushAllocation(_ctx->getDevice()->allocator(), _shBuffer.allocation, 0,
-                                           sizeof(glm::vec4) * 9);
-                    }
-                    ktxTexture_Destroy(ktxTexture(ktex));
-                }
-            }
+            return out;
         }
+        data = std::move(state->readyData);
+        success = state->resultSuccess;
+        state->resultReady = false;
     }
 
-    // Diffuse cubemap (optional; if missing, reuse specular)
-    if (!paths.diffuseCube.empty())
+    out.completed = true;
+    if (!success)
     {
-        ktxutil::KtxCubemap kcm{};
-        if (ktxutil::load_ktx2_cubemap(paths.diffuseCube.c_str(), kcm))
-        {
-            _diff = rm->create_image_compressed_layers(
-                kcm.bytes.data(), kcm.bytes.size(),
-                kcm.fmt, kcm.mipLevels, kcm.layers,
-                kcm.copies,
-                VK_IMAGE_USAGE_SAMPLED_BIT,
-                kcm.imgFlags
-            );
-        }
-    }
-    if (_diff.image == VK_NULL_HANDLE && _spec.image != VK_NULL_HANDLE)
-    {
-        _diff = _spec;
+        out.success = false;
+        return out;
     }
 
-    if (!paths.background2D.empty())
-    {
-        ktxutil::Ktx2D bg{};
-        if (ktxutil::load_ktx2_2d(paths.background2D.c_str(), bg))
-        {
-            std::vector<ResourceManager::MipLevelCopy> lv;
-            lv.reserve(bg.mipLevels);
-            for (uint32_t mip = 0; mip < bg.mipLevels; ++mip)
-            {
-                const auto &r = bg.copies[mip];
-                lv.push_back(ResourceManager::MipLevelCopy{
-                    .offset = r.bufferOffset,
-                    .length = 0,
-                    .width  = r.imageExtent.width,
-                    .height = r.imageExtent.height,
-                });
-            }
-            _background = rm->create_image_compressed(
-                bg.bytes.data(), bg.bytes.size(), bg.fmt, lv,
-                VK_IMAGE_USAGE_SAMPLED_BIT);
-        }
-    }
-
-    if (_background.image == VK_NULL_HANDLE && _spec.image != VK_NULL_HANDLE)
-    {
-        _background = _spec;
-    }
-
-    // BRDF LUT
-    if (!paths.brdfLut2D.empty())
-    {
-        ktxutil::Ktx2D lut{};
-        if (ktxutil::load_ktx2_2d(paths.brdfLut2D.c_str(), lut))
-        {
-            std::vector<ResourceManager::MipLevelCopy> lv;
-            lv.reserve(lut.mipLevels);
-            for (uint32_t mip = 0; mip < lut.mipLevels; ++mip)
-            {
-                const auto &r = lut.copies[mip];
-                lv.push_back(ResourceManager::MipLevelCopy{
-                    .offset = r.bufferOffset,
-                    .length = 0,
-                    .width = r.imageExtent.width,
-                    .height = r.imageExtent.height,
-                });
-            }
-            _brdf = rm->create_image_compressed(lut.bytes.data(), lut.bytes.size(), lut.fmt, lv,
-                                                VK_IMAGE_USAGE_SAMPLED_BIT);
-        }
-    }
-
-    return (_spec.image != VK_NULL_HANDLE) && (_diff.image != VK_NULL_HANDLE);
+    // Commit GPU resources on the main thread.
+    out.success = commit_prepared(data);
+    return out;
 }
 
 void IBLManager::unload()
 {
+    shutdown_async();
+
     if (_ctx == nullptr || _ctx->getResources() == nullptr) return;
 
     // Destroy images and SH buffer first.
@@ -363,3 +498,150 @@ void IBLManager::destroy_images_and_sh()
     _background = {};
     _brdf = {};
 }
+
+void IBLManager::shutdown_async()
+{
+    if (_async == nullptr) return;
+
+    AsyncStateData *state = _async;
+    {
+        std::lock_guard<std::mutex> lock(state->mutex);
+        state->shutdown = true;
+        state->requestPending = false;
+    }
+    state->cv.notify_all();
+    if (state->worker.joinable())
+    {
+        state->worker.join();
+    }
+
+    delete _async;
+    _async = nullptr;
+}
+
+bool IBLManager::commit_prepared(const PreparedIBLData &data)
+{
+    if (_ctx == nullptr || _ctx->getResources() == nullptr)
+    {
+        return false;
+    }
+
+    ResourceManager *rm = _ctx->getResources();
+
+    if (rm->deferred_uploads() && rm->has_pending_uploads())
+    {
+        rm->process_queued_uploads_immediate();
+    }
+
+    destroy_images_and_sh();
+    ensureLayout();
+
+    if (data.has_spec)
+    {
+        if (data.spec_is_cubemap)
+        {
+            const auto &kcm = data.spec_cubemap;
+            _spec = rm->create_image_compressed_layers(
+                kcm.bytes.data(), kcm.bytes.size(),
+                kcm.fmt, kcm.mipLevels, kcm.layers,
+                kcm.copies,
+                VK_IMAGE_USAGE_SAMPLED_BIT,
+                kcm.imgFlags);
+        }
+        else
+        {
+            const auto &k2d = data.spec_2d;
+            std::vector<ResourceManager::MipLevelCopy> lv;
+            lv.reserve(k2d.mipLevels);
+            for (uint32_t mip = 0; mip < k2d.mipLevels; ++mip)
+            {
+                const auto &r = k2d.copies[mip];
+                lv.push_back(ResourceManager::MipLevelCopy{
+                    .offset = r.bufferOffset,
+                    .length = 0,
+                    .width = r.imageExtent.width,
+                    .height = r.imageExtent.height,
+                });
+            }
+            _spec = rm->create_image_compressed(
+                k2d.bytes.data(), k2d.bytes.size(), k2d.fmt, lv,
+                VK_IMAGE_USAGE_SAMPLED_BIT);
+
+            if (data.has_sh)
+            {
+                _shBuffer = rm->create_buffer(sizeof(glm::vec4) * 9,
+                                              VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
+                                              VMA_MEMORY_USAGE_CPU_TO_GPU);
+                for (int i = 0; i < 9; ++i)
+                {
+                    std::memcpy(reinterpret_cast<char *>(_shBuffer.info.pMappedData) + i * sizeof(glm::vec4),
+                                &data.sh[i], sizeof(glm::vec4));
+                }
+                vmaFlushAllocation(_ctx->getDevice()->allocator(), _shBuffer.allocation, 0,
+                                   sizeof(glm::vec4) * 9);
+            }
+        }
+    }
+
+    if (data.has_diffuse)
+    {
+        const auto &kcm = data.diff_cubemap;
+        _diff = rm->create_image_compressed_layers(
+            kcm.bytes.data(), kcm.bytes.size(),
+            kcm.fmt, kcm.mipLevels, kcm.layers,
+            kcm.copies,
+            VK_IMAGE_USAGE_SAMPLED_BIT,
+            kcm.imgFlags);
+    }
+    if (_diff.image == VK_NULL_HANDLE && _spec.image != VK_NULL_HANDLE)
+    {
+        _diff = _spec;
+    }
+
+    if (data.has_background)
+    {
+        const auto &bg = data.background_2d;
+        std::vector<ResourceManager::MipLevelCopy> lv;
+        lv.reserve(bg.mipLevels);
+        for (uint32_t mip = 0; mip < bg.mipLevels; ++mip)
+        {
+            const auto &r = bg.copies[mip];
+            lv.push_back(ResourceManager::MipLevelCopy{
+                .offset = r.bufferOffset,
+                .length = 0,
+                .width = r.imageExtent.width,
+                .height = r.imageExtent.height,
+            });
+        }
+        _background = rm->create_image_compressed(
+            bg.bytes.data(), bg.bytes.size(), bg.fmt, lv,
+            VK_IMAGE_USAGE_SAMPLED_BIT);
+    }
+
+    if (_background.image == VK_NULL_HANDLE && _spec.image != VK_NULL_HANDLE)
+    {
+        _background = _spec;
+    }
+
+    if (data.has_brdf)
+    {
+        const auto &lut = data.brdf_2d;
+        std::vector<ResourceManager::MipLevelCopy> lv;
+        lv.reserve(lut.mipLevels);
+        for (uint32_t mip = 0; mip < lut.mipLevels; ++mip)
+        {
+            const auto &r = lut.copies[mip];
+            lv.push_back(ResourceManager::MipLevelCopy{
+                .offset = r.bufferOffset,
+                .length = 0,
+                .width = r.imageExtent.width,
+                .height = r.imageExtent.height,
+            });
+        }
+        _brdf = rm->create_image_compressed(
+            lut.bytes.data(), lut.bytes.size(), lut.fmt, lv,
+            VK_IMAGE_USAGE_SAMPLED_BIT);
+    }
+
+    return (_spec.image != VK_NULL_HANDLE) && (_diff.image != VK_NULL_HANDLE);
+}
diff --git a/src/core/assets/ibl_manager.h b/src/core/assets/ibl_manager.h
index 0db490c..1eb72d7 100644
--- a/src/core/assets/ibl_manager.h
+++ b/src/core/assets/ibl_manager.h
@@ -7,6 +7,8 @@ class TextureCache;
 
 class EngineContext;
 
+struct PreparedIBLData;
+
 struct IBLPaths
 {
     std::string specularCube; // .ktx2 (GPU-ready BC6H or R16G16B16A16)
@@ -20,13 +22,35 @@ struct IBLPaths
 class IBLManager
 {
 public:
-    void init(EngineContext *ctx) { _ctx = ctx; }
+    IBLManager() = default;
+    ~IBLManager();
+
+    void init(EngineContext *ctx);
 
     void set_texture_cache(TextureCache *cache) { _cache = cache; }
 
     // Load all three textures. Returns true when specular+diffuse (and optional LUT) are resident.
     bool load(const IBLPaths &paths);
 
+    // Asynchronous IBL load:
+    // - Performs KTX2 file I/O and SH bake on a background thread.
+    // - GPU image creation and SH upload are deferred to pump_async() on the main thread.
+    // Returns false if the job could not be queued.
+    bool load_async(const IBLPaths &paths);
+
+    struct AsyncResult
+    {
+        // True when an async job finished since the last pump_async() call.
+        bool completed{false};
+        // True when the finished job successfully produced new GPU IBL resources.
+        bool success{false};
+    };
+
+    // Main-thread integration: if a completed async job is pending, destroy the
+    // previous IBL images/SH and upload the new ones. Must be called only when
+    // the GPU is idle for the previous frame.
+    AsyncResult pump_async();
+
     // Release GPU memory and patch to fallbacks handled by the caller.
     void unload();
 
@@ -57,6 +81,13 @@ private:
     VkDescriptorSetLayout _iblSetLayout = VK_NULL_HANDLE;
     AllocatedBuffer _shBuffer{}; // 9*vec4 coefficients (RGB in .xyz)
 
+    struct AsyncStateData;
+    AsyncStateData *_async{nullptr};
+
+    bool commit_prepared(const PreparedIBLData &data);
+
     // Destroy current GPU images/SH buffer but keep descriptor layout alive.
     void destroy_images_and_sh();
+
+    void shutdown_async();
 };
diff --git a/src/core/engine.cpp b/src/core/engine.cpp
index 121cfca..232e047 100644
--- a/src/core/engine.cpp
+++ b/src/core/engine.cpp
@@ -250,7 +250,7 @@ void VulkanEngine::init()
     // Publish to context for passes and pipeline layout assembly
     _context->ibl = _iblManager.get();
 
-    // Try to load default IBL assets if present
+    // Try to load default IBL assets if present (async)
     {
         IBLPaths ibl{};
         ibl.specularCube = _assetManager->assetPath("ibl/docklands.ktx2");
@@ -262,13 +262,21 @@ void VulkanEngine::init()
         // Treat this as the global/fallback IBL used outside any local volume.
         _globalIBLPaths = ibl;
         _activeIBLVolume = -1;
-        bool ibl_ok = _iblManager->load(ibl);
-        _hasGlobalIBL = ibl_ok;
-        if (!ibl_ok)
+        _hasGlobalIBL = false;
+        if (_iblManager)
         {
-            fmt::println("[Engine] Warning: failed to load default IBL (specular='{}', brdfLut='{}'). IBL lighting will be disabled until a valid IBL is loaded.",
-                         ibl.specularCube,
-                         ibl.brdfLut2D);
+            if (_iblManager->load_async(ibl))
+            {
+                _pendingIBLRequest.active = true;
+                _pendingIBLRequest.targetVolume = -1;
+                _pendingIBLRequest.paths = ibl;
+            }
+            else
+            {
+                fmt::println("[Engine] Warning: failed to enqueue default IBL load (specular='{}', brdfLut='{}'). IBL lighting will be disabled until a valid IBL is loaded.",
+                             ibl.specularCube,
+                             ibl.brdfLut2D);
+            }
         }
     }
 
@@ -436,6 +444,56 @@ bool VulkanEngine::addGLTFInstance(const std::string &instanceName,
     return true;
 }
 
+bool VulkanEngine::addPrimitiveInstance(const std::string &instanceName,
+                                        AssetManager::MeshGeometryDesc::Type geomType,
+                                        const glm::mat4 &transform,
+                                        const AssetManager::MeshMaterialDesc &material,
+                                        std::optional<BoundsType> boundsTypeOverride)
+{
+    if (!_assetManager || !_sceneManager)
+    {
+        return false;
+    }
+
+    // Build a cache key for the primitive mesh so multiple instances
+    // share the same GPU buffers.
+    std::string meshName;
+    switch (geomType)
+    {
+    case AssetManager::MeshGeometryDesc::Type::Cube:
+        meshName = "Primitive.Cube";
+        break;
+    case AssetManager::MeshGeometryDesc::Type::Sphere:
+        meshName = "Primitive.Sphere";
+        break;
+    case AssetManager::MeshGeometryDesc::Type::Plane:
+        meshName = "Primitive.Plane";
+        break;
+    case AssetManager::MeshGeometryDesc::Type::Capsule:
+        meshName = "Primitive.Capsule";
+        break;
+    case AssetManager::MeshGeometryDesc::Type::Provided:
+    default:
+        // Provided geometry requires explicit vertex/index data; not supported here.
+        return false;
+    }
+
+    AssetManager::MeshCreateInfo ci{};
+    ci.name = meshName;
+    ci.geometry.type = geomType;
+    ci.material = material;
+    ci.boundsType = boundsTypeOverride;
+
+    auto mesh = _assetManager->createMesh(ci);
+    if (!mesh)
+    {
+        return false;
+    }
+
+    _sceneManager->addMeshInstance(instanceName, mesh, transform, boundsTypeOverride);
+    return true;
+}
+
 uint32_t VulkanEngine::loadGLTFAsync(const std::string &sceneName,
                                      const std::string &modelRelativePath,
                                      const glm::mat4 &transform,
@@ -627,6 +685,7 @@ void VulkanEngine::draw()
                 break;
             }
         }
+
         if (newVolume != _activeIBLVolume)
         {
             const IBLPaths *paths = nullptr;
@@ -639,17 +698,25 @@ void VulkanEngine::draw()
                 paths = &_globalIBLPaths;
             }
 
-            if (paths)
+            // Avoid enqueueing duplicate jobs for the same target volume.
+            const bool alreadyPendingForTarget =
+                _pendingIBLRequest.active && _pendingIBLRequest.targetVolume == newVolume;
+
+            if (paths && !alreadyPendingForTarget)
             {
-                bool ibl_ok = _iblManager->load(*paths);
-                if (!ibl_ok)
+                if (_iblManager->load_async(*paths))
                 {
-                    fmt::println("[Engine] Warning: failed to load IBL for {} (specular='{}')",
+                    _pendingIBLRequest.active = true;
+                    _pendingIBLRequest.targetVolume = newVolume;
+                    _pendingIBLRequest.paths = *paths;
+                }
+                else
+                {
+                    fmt::println("[Engine] Warning: failed to enqueue IBL load for {} (specular='{}')",
                                  (newVolume >= 0) ? "volume" : "global environment",
                                  paths->specularCube);
                 }
             }
-            _activeIBLVolume = newVolume;
         }
     }
 
@@ -1118,6 +1185,33 @@ void VulkanEngine::run()
         // Safe to destroy any BLAS queued for deletion now that the previous frame is idle.
         if (_rayManager) { _rayManager->flushPendingDeletes(); }
 
+        // Commit any completed async IBL load now that the GPU is idle.
+        if (_iblManager && _pendingIBLRequest.active)
+        {
+            IBLManager::AsyncResult iblRes = _iblManager->pump_async();
+            if (iblRes.completed)
+            {
+                if (iblRes.success)
+                {
+                    if (_pendingIBLRequest.targetVolume >= 0)
+                    {
+                        _activeIBLVolume = _pendingIBLRequest.targetVolume;
+                    }
+                    else
+                    {
+                        _activeIBLVolume = -1;
+                        _hasGlobalIBL = true;
+                    }
+                }
+                else
+                {
+                    fmt::println("[Engine] Warning: async IBL load failed (specular='{}')",
+                                 _pendingIBLRequest.paths.specularCube);
+                }
+                _pendingIBLRequest.active = false;
+            }
+        }
+
         if (_pickResultPending && _pickReadbackBuffer.buffer && _sceneManager)
         {
             vmaInvalidateAllocation(_deviceManager->allocator(), _pickReadbackBuffer.allocation, 0, sizeof(uint32_t));
diff --git a/src/core/engine.h b/src/core/engine.h
index 6263b6d..ab8aa97 100644
--- a/src/core/engine.h
+++ b/src/core/engine.h
@@ -133,6 +133,13 @@ public:
     // User-defined local IBL volumes and currently active index (-1 = global).
     std::vector<IBLVolume> _iblVolumes;
     int _activeIBLVolume{-1};
+    // Pending async IBL request (global or volume). targetVolume = -1 means global.
+    struct PendingIBLRequest
+    {
+        bool active{false};
+        int targetVolume{-1};
+        IBLPaths paths{};
+    } _pendingIBLRequest;
 
     struct PickInfo
     {
@@ -206,6 +213,20 @@ public:
                          const glm::mat4 &transform = glm::mat4(1.f),
                          bool preloadTextures = false);
 
+    // Spawn a runtime primitive mesh instance (cube/sphere/plane/capsule).
+    // - instanceName is the unique key for this object in SceneManager.
+    // - geomType selects which analytic primitive to build.
+    // - material controls whether the primitive uses the default PBR material
+    //   or a textured material (see AssetManager::MeshMaterialDesc).
+    // - boundsTypeOverride can force a specific bounds type for picking.
+    // The underlying mesh is cached in AssetManager using a per-primitive name,
+    // so multiple instances share GPU buffers.
+    bool addPrimitiveInstance(const std::string &instanceName,
+                              AssetManager::MeshGeometryDesc::Type geomType,
+                              const glm::mat4 &transform = glm::mat4(1.f),
+                              const AssetManager::MeshMaterialDesc &material = {},
+                              std::optional<BoundsType> boundsTypeOverride = {});
+
     // Asynchronous glTF load that reports progress via AsyncAssetLoader.
     // Returns a JobID that can be queried via AsyncAssetLoader.
     // If preloadTextures is true, textures will be immediately marked for loading to VRAM.
diff --git a/src/core/engine_ui.cpp b/src/core/engine_ui.cpp
index ec72605..b88b6cb 100644
--- a/src/core/engine_ui.cpp
+++ b/src/core/engine_ui.cpp
@@ -242,19 +242,27 @@ namespace
             {
                 if (eng->_iblManager && vol.enabled)
                 {
-                    eng->_iblManager->load(vol.paths);
-                    eng->_activeIBLVolume = static_cast<int>(i);
+                    if (eng->_iblManager->load_async(vol.paths))
+                    {
+                        eng->_pendingIBLRequest.active = true;
+                        eng->_pendingIBLRequest.targetVolume = static_cast<int>(i);
+                        eng->_pendingIBLRequest.paths = vol.paths;
+                    }
                 }
             }
             ImGui::SameLine();
             if (ImGui::Button("Set As Global IBL"))
             {
                 eng->_globalIBLPaths = vol.paths;
-                eng->_hasGlobalIBL = true;
-                eng->_activeIBLVolume = -1;
                 if (eng->_iblManager)
                 {
-                    eng->_iblManager->load(eng->_globalIBLPaths);
+                    if (eng->_iblManager->load_async(eng->_globalIBLPaths))
+                    {
+                        eng->_pendingIBLRequest.active = true;
+                        eng->_pendingIBLRequest.targetVolume = -1;
+                        eng->_pendingIBLRequest.paths = eng->_globalIBLPaths;
+                        eng->_hasGlobalIBL = false;
+                    }
                 }
             }