diff --git a/.gitignore b/.gitignore index d189575..602a342 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,4 @@ /cmake-build-debug /cmake-build-debug_win /cmake-build-release_win -*.spv \ No newline at end of file +*.spv diff --git a/shaders/particles.frag b/shaders/particles.frag index 4fb3ca0..3329bc1 100644 --- a/shaders/particles.frag +++ b/shaders/particles.frag @@ -1,7 +1,27 @@ #version 450 +layout(set = 0, binding = 0) uniform SceneData +{ + mat4 view; + mat4 proj; + mat4 viewproj; +} sceneData; + layout(location = 0) in vec4 v_color; layout(location = 1) in vec2 v_uv; +layout(location = 2) in float v_view_depth; +layout(location = 3) in float v_seed; + +layout(set = 2, binding = 0) uniform sampler2D gbufPosTex; +layout(set = 3, binding = 0) uniform sampler2D flipbookTex; +layout(set = 3, binding = 1) uniform sampler2D noiseTex; + +layout(push_constant) uniform ParticlePush +{ + vec4 screen; // x=invW, y=invH, z=softDepthDistance, w=timeSeconds + vec4 flipbook; // x=cols, y=rows, z=fps, w=intensity + vec4 noise; // x=scale, y=strength, z=scrollX, w=scrollY +} pc; layout(location = 0) out vec4 outColor; @@ -16,6 +36,63 @@ void main() c.rgb *= mask; c.a *= mask; + // Flipbook sampling + noise UV distortion (atlas). + float cols = max(pc.flipbook.x, 1.0); + float rows = max(pc.flipbook.y, 1.0); + float frames = cols * rows; + float fps = max(pc.flipbook.z, 0.0); + float intensity = max(pc.flipbook.w, 0.0); + + vec2 uv = v_uv; + float noiseScale = max(pc.noise.x, 0.0); + float noiseStrength = max(pc.noise.y, 0.0); + if (noiseScale > 0.0 && noiseStrength > 0.0) + { + vec2 nUV = uv * noiseScale + pc.screen.w * pc.noise.zw; + vec2 n = texture(noiseTex, nUV).rg * 2.0 - 1.0; + vec2 cell = vec2(1.0 / cols, 1.0 / rows); + uv = clamp(uv + n * noiseStrength * cell, 0.0, 1.0); + } + + uint frame = 0u; + if (frames > 0.5 && fps > 0.0) + { + float ff = pc.screen.w * fps + v_seed * frames; + frame = uint(ff) % uint(frames); + } + uint cols_u = uint(cols); + uint rows_u = uint(rows); + uint fx = frame % cols_u; + // Flipbook sheets are usually laid out with row 0 at the top. + uint fy = (rows_u > 0u) ? (rows_u - 1u - (frame / cols_u)) : 0u; + + vec2 cell = vec2(1.0 / cols, 1.0 / rows); + vec2 atlas_uv = uv * cell + vec2(float(fx), float(fy)) * cell; + + vec3 flip = texture(flipbookTex, atlas_uv).rgb; + // BC6H has no alpha; approximate mask from luminance. + float flip_a = clamp(dot(flip, vec3(0.2126, 0.7152, 0.0722)), 0.0, 1.0); + + c.rgb *= flip * intensity; + c.a *= flip_a; + + // Soft particles: fade out near opaque geometry intersections. + float soft = 1.0; + float softDist = pc.screen.z; + if (softDist > 0.0) + { + vec2 suv = gl_FragCoord.xy * pc.screen.xy; + vec4 scenePos = texture(gbufPosTex, suv); + if (scenePos.w > 0.0) + { + float sceneDepth = -(sceneData.view * vec4(scenePos.xyz, 1.0)).z; + float delta = sceneDepth - v_view_depth; + soft = clamp(delta / softDist, 0.0, 1.0); + } + } + c.rgb *= soft; + c.a *= soft; + if (c.a <= 0.001) { discard; @@ -23,4 +100,3 @@ void main() outColor = c; } - diff --git a/shaders/particles.vert b/shaders/particles.vert index 49d2b6c..8302ac2 100644 --- a/shaders/particles.vert +++ b/shaders/particles.vert @@ -20,8 +20,15 @@ layout(std430, set = 1, binding = 0) readonly buffer ParticlePool Particle particles[]; } pool; +layout(std430, set = 1, binding = 1) readonly buffer DrawIndices +{ + uint indices[]; +} drawIndices; + layout(location = 0) out vec4 v_color; layout(location = 1) out vec2 v_uv; +layout(location = 2) out float v_view_depth; +layout(location = 3) out float v_seed; vec2 quad_corner(uint vidx) { @@ -39,7 +46,7 @@ vec2 quad_corner(uint vidx) void main() { - uint particle_index = gl_InstanceIndex; + uint particle_index = drawIndices.indices[uint(gl_InstanceIndex)]; Particle p = pool.particles[particle_index]; float life = max(p.vel_life.w, 1e-6); @@ -61,6 +68,7 @@ void main() vec3 pos = p.pos_age.xyz + (cam_right * corner.x + cam_up * corner.y) * size; v_color = vec4(p.color.rgb * fade, p.color.a * fade); + v_view_depth = -(sceneData.view * vec4(pos, 1.0)).z; + v_seed = p.misc.y; gl_Position = sceneData.viewproj * vec4(pos, 1.0); } - diff --git a/shaders/particles_build_indices.comp b/shaders/particles_build_indices.comp new file mode 100644 index 0000000..f575730 --- /dev/null +++ b/shaders/particles_build_indices.comp @@ -0,0 +1,46 @@ +#version 450 + +// Output is a global indices[] buffer, indexed by gl_InstanceIndex. + +layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in; + +layout(std430, set = 0, binding = 0) readonly buffer SortedBlocks +{ + uint blocks[]; +} sortedBlocks; + +layout(std430, set = 0, binding = 1) writeonly buffer DrawIndices +{ + uint indices[]; +} outIndices; + +layout(push_constant) uniform Push +{ + uvec4 header; // x=base, y=count, z=flags (bit0=identity) +} pc; + +const uint BLOCK_SIZE = 256u; + +void main() +{ + uint i = gl_GlobalInvocationID.x; + uint count = pc.header.y; + if (i >= count) return; + + uint base = pc.header.x; + uint flags = pc.header.z; + + uint outIdx = base + i; + uint particleIdx = base + i; + + if ((flags & 1u) == 0u) + { + uint blockRank = i / BLOCK_SIZE; + uint within = i - blockRank * BLOCK_SIZE; + uint block = sortedBlocks.blocks[blockRank]; + particleIdx = base + block * BLOCK_SIZE + within; + } + + outIndices.indices[outIdx] = particleIdx; +} + diff --git a/shaders/particles_sort_blocks.comp b/shaders/particles_sort_blocks.comp new file mode 100644 index 0000000..2b70249 --- /dev/null +++ b/shaders/particles_sort_blocks.comp @@ -0,0 +1,101 @@ +#version 450 + +// Each system dispatch sorts up to 512 blocks of 256 particles by max view-space depth. + +layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in; + +struct Particle +{ + vec4 pos_age; + vec4 vel_life; + vec4 color; + vec4 misc; +}; + +layout(std430, set = 0, binding = 0) readonly buffer ParticlePool +{ + Particle particles[]; +} pool; + +layout(std430, set = 0, binding = 1) writeonly buffer SortedBlocks +{ + uint blocks[]; +} outBlocks; + +layout(push_constant) uniform Push +{ + uvec4 header; // x=base, y=count + mat4 view; +} pc; + +const uint BLOCK_SIZE = 256u; +const uint MAX_BLOCKS = 512u; + +shared float s_key[512]; +shared uint s_block[512]; + +void main() +{ + uint tid = gl_LocalInvocationID.x; + + uint count = pc.header.y; + uint blockCount = (count + BLOCK_SIZE - 1u) / BLOCK_SIZE; + blockCount = min(blockCount, MAX_BLOCKS); + + float key = 1e20; + if (tid < blockCount) + { + uint blockStart = pc.header.x + tid * BLOCK_SIZE; + uint localCount = min(BLOCK_SIZE, count - tid * BLOCK_SIZE); + + float maxDepth = -1e20; + for (uint i = 0u; i < localCount; ++i) + { + uint idx = blockStart + i; + vec4 viewPos = pc.view * vec4(pool.particles[idx].pos_age.xyz, 1.0); + float depth = -viewPos.z; + maxDepth = max(maxDepth, depth); + } + + // Sort ascending by -depth => farthest first. + key = -maxDepth; + } + + s_key[tid] = key; + s_block[tid] = tid; + barrier(); + + // Bitonic sort ascending for 512 elements. + for (uint k = 2u; k <= MAX_BLOCKS; k <<= 1u) + { + for (uint j = (k >> 1u); j > 0u; j >>= 1u) + { + uint ixj = tid ^ j; + if (ixj > tid) + { + bool ascending = ((tid & k) == 0u); + + float a = s_key[tid]; + float b = s_key[ixj]; + uint ai = s_block[tid]; + uint bi = s_block[ixj]; + + bool swap = (ascending && (a > b)) || (!ascending && (a < b)); + if (swap) + { + s_key[tid] = b; + s_key[ixj] = a; + s_block[tid] = bi; + s_block[ixj] = ai; + } + } + barrier(); + } + } + + if (tid < blockCount) + { + outBlocks.blocks[tid] = s_block[tid]; + } +} + diff --git a/src/core/engine.cpp b/src/core/engine.cpp index 1071220..6f25973 100644 --- a/src/core/engine.cpp +++ b/src/core/engine.cpp @@ -1127,6 +1127,16 @@ void VulkanEngine::draw() _textureCache->pumpLoads(*_resourceManager, get_current_frame()); } + // Allow passes to enqueue texture/image uploads before the upload pass snapshot. + // Particles use this to preload flipbooks/noise referenced by systems. + if (_renderPassManager) + { + if (auto *particles = _renderPassManager->getPass()) + { + particles->preload_needed_textures(); + } + } + _resourceManager->register_upload_pass(*_renderGraph, get_current_frame()); ImGuiPass *imguiPass = nullptr; @@ -1193,7 +1203,7 @@ void VulkanEngine::draw() if (auto *particles = _renderPassManager->getPass()) { - particles->register_graph(_renderGraph.get(), hdrTarget, hDepth); + particles->register_graph(_renderGraph.get(), hdrTarget, hDepth, hGBufferPosition); } if (auto *transparent = _renderPassManager->getPass()) diff --git a/src/core/engine_ui.cpp b/src/core/engine_ui.cpp index 69997fb..192c924 100644 --- a/src/core/engine_ui.cpp +++ b/src/core/engine_ui.cpp @@ -31,6 +31,7 @@ #include #include #include +#include #include "mesh_bvh.h" @@ -329,6 +330,31 @@ namespace selected = std::clamp(selected, 0, (int)systems.size() - 1); auto &s = systems[(size_t)selected]; + static std::vector vfxKtx2; + auto refresh_vfx_list = [&]() { + vfxKtx2.clear(); + vfxKtx2.push_back(std::string{}); // None + if (!eng || !eng->_assetManager) return; + const auto &paths = eng->_assetManager->paths(); + if (paths.assets.empty()) return; + std::error_code ec; + std::filesystem::path vfxDir = paths.assets / "vfx"; + if (!std::filesystem::exists(vfxDir, ec) || ec) return; + for (const auto &entry : std::filesystem::directory_iterator(vfxDir, ec)) + { + if (ec) break; + if (!entry.is_regular_file(ec) || ec) continue; + const auto p = entry.path(); + if (p.extension() != ".ktx2" && p.extension() != ".KTX2") continue; + vfxKtx2.push_back(std::string("vfx/") + p.filename().string()); + } + std::sort(vfxKtx2.begin() + 1, vfxKtx2.end()); + }; + if (vfxKtx2.empty()) + { + refresh_vfx_list(); + } + ImGui::Separator(); ImGui::Text("Selected: id=%u base=%u count=%u", s.id, s.base, s.count); @@ -347,7 +373,7 @@ namespace return; } - const char *blendItems[] = {"Additive", "Alpha (unsorted)"}; + const char *blendItems[] = {"Additive", "Alpha (block-sorted)"}; int blend = (s.blend == ParticlePass::BlendMode::Alpha) ? 1 : 0; if (ImGui::Combo("Blend", &blend, blendItems, 2)) { @@ -387,6 +413,72 @@ namespace ImGui::SliderFloat("Drag", &s.params.drag, 0.0f, 10.0f, "%.3f"); ImGui::SliderFloat("Gravity (m/s^2)", &s.params.gravity, 0.0f, 30.0f, "%.2f"); + ImGui::Separator(); + ImGui::TextUnformatted("Rendering"); + ImGui::SliderFloat("Soft Depth (m)", &s.params.soft_depth_distance, 0.0f, 2.0f, "%.3f"); + + if (ImGui::Button("Refresh VFX List")) + { + refresh_vfx_list(); + } + ImGui::SameLine(); + if (ImGui::Button("Use Flame Defaults")) + { + s.flipbook_texture = "vfx/flame.ktx2"; + s.noise_texture = "vfx/simplex.ktx2"; + s.params.flipbook_cols = 16; + s.params.flipbook_rows = 4; + s.params.flipbook_fps = 30.0f; + s.params.flipbook_intensity = 1.0f; + s.params.noise_scale = 6.0f; + s.params.noise_strength = 0.05f; + s.params.noise_scroll = glm::vec2(0.0f, 0.0f); + pass->preload_vfx_texture(s.flipbook_texture); + pass->preload_vfx_texture(s.noise_texture); + } + + auto combo_vfx = [&](const char *label, std::string &path) { + const char *preview = path.empty() ? "None" : path.c_str(); + if (ImGui::BeginCombo(label, preview)) + { + for (const auto &opt : vfxKtx2) + { + const bool isNone = opt.empty(); + const bool isSelected = (path == opt) || (path.empty() && isNone); + const char *name = isNone ? "None" : opt.c_str(); + if (ImGui::Selectable(name, isSelected)) + { + path = opt; + if (!path.empty()) + { + pass->preload_vfx_texture(path); + } + } + if (isSelected) ImGui::SetItemDefaultFocus(); + } + ImGui::EndCombo(); + } + }; + + ImGui::Separator(); + ImGui::TextUnformatted("Flipbook"); + combo_vfx("Flipbook Texture", s.flipbook_texture); + int cols = (int)s.params.flipbook_cols; + int rows = (int)s.params.flipbook_rows; + cols = std::max(cols, 1); + rows = std::max(rows, 1); + if (ImGui::InputInt("Flipbook Cols", &cols)) s.params.flipbook_cols = (uint32_t)std::max(cols, 1); + if (ImGui::InputInt("Flipbook Rows", &rows)) s.params.flipbook_rows = (uint32_t)std::max(rows, 1); + ImGui::SliderFloat("Flipbook FPS", &s.params.flipbook_fps, 0.0f, 120.0f, "%.1f"); + ImGui::SliderFloat("Flipbook Intensity", &s.params.flipbook_intensity, 0.0f, 8.0f, "%.3f"); + + ImGui::Separator(); + ImGui::TextUnformatted("Noise"); + combo_vfx("Noise Texture", s.noise_texture); + ImGui::SliderFloat("Noise Scale", &s.params.noise_scale, 0.0f, 32.0f, "%.3f"); + ImGui::SliderFloat("Noise Strength", &s.params.noise_strength, 0.0f, 1.0f, "%.3f"); + ImGui::InputFloat2("Noise Scroll", reinterpret_cast(&s.params.noise_scroll)); + ImGui::Separator(); ImGui::TextUnformatted("Color"); ImGui::ColorEdit4("Tint", reinterpret_cast(&s.params.color), ImGuiColorEditFlags_Float); diff --git a/src/render/passes/particles.cpp b/src/render/passes/particles.cpp index 4fba737..21924d1 100644 --- a/src/render/passes/particles.cpp +++ b/src/render/passes/particles.cpp @@ -2,6 +2,7 @@ #include "compute/vk_compute.h" #include "core/assets/manager.h" +#include #include "core/context.h" #include "core/descriptor/descriptors.h" #include "core/descriptor/manager.h" @@ -10,6 +11,7 @@ #include "core/device/swapchain.h" #include "core/frame/resources.h" #include "core/pipeline/manager.h" +#include "core/pipeline/sampler.h" #include "render/graph/graph.h" #include "render/pipelines.h" #include "scene/vk_scene.h" @@ -30,6 +32,9 @@ namespace static_assert(sizeof(ParticleGPU) == 64); constexpr uint32_t k_local_size_x = 256; + constexpr uint32_t k_block_size = 256; + constexpr uint32_t k_max_blocks = ParticlePass::k_max_particles / k_block_size; + static_assert(k_max_blocks == 512); struct ParticleUpdatePushConstants { @@ -44,6 +49,44 @@ namespace }; static_assert(sizeof(ParticleUpdatePushConstants) == 128); + struct ParticleSortPushConstants + { + glm::uvec4 header; // x=base, y=count, z/w unused + glm::mat4 view; + }; + static_assert(sizeof(ParticleSortPushConstants) == 80); + + struct ParticleBuildIndicesPushConstants + { + glm::uvec4 header; // x=base, y=count, z=flags (bit0=identity), w unused + }; + static_assert(sizeof(ParticleBuildIndicesPushConstants) == 16); + + struct ParticleRenderPushConstants + { + glm::vec4 screen; // x=invW, y=invH, z=softDepthDistance, w=timeSeconds + glm::vec4 flipbook; // x=cols, y=rows, z=fps, w=intensity + glm::vec4 noise; // x=scale, y=strength, z=scrollX, w=scrollY + }; + static_assert(sizeof(ParticleRenderPushConstants) == 48); + + static void cmd_compute_memory_barrier(VkCommandBuffer cmd, VkAccessFlags2 srcAccess, VkAccessFlags2 dstAccess) + { + VkMemoryBarrier2 mb{}; + mb.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2; + mb.srcStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT; + mb.srcAccessMask = srcAccess; + mb.dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT; + mb.dstAccessMask = dstAccess; + + VkDependencyInfo dep{}; + dep.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO; + dep.memoryBarrierCount = 1; + dep.pMemoryBarriers = &mb; + + vkCmdPipelineBarrier2(cmd, &dep); + } + static glm::vec3 safe_normalize(const glm::vec3 &v, const glm::vec3 &fallback) { const float len2 = glm::dot(v, v); @@ -61,11 +104,111 @@ namespace } } +void ParticlePass::preload_vfx_texture(const std::string &assetName) +{ + (void)get_or_load_vfx_texture(assetName); +} + +void ParticlePass::preload_needed_textures() +{ + if (!_context) return; + for (const auto &sys : _systems) + { + if (!sys.flipbook_texture.empty()) + { + (void)get_or_load_vfx_texture(sys.flipbook_texture); + } + if (!sys.noise_texture.empty()) + { + (void)get_or_load_vfx_texture(sys.noise_texture); + } + } +} + +AllocatedImage *ParticlePass::find_vfx_texture(std::string_view assetName) +{ + if (assetName.empty() || !_context || !_context->getAssets()) + { + return nullptr; + } + + std::string resolved = _context->getAssets()->assetPath(assetName); + if (resolved.empty()) + { + return nullptr; + } + + auto it = _vfx_texture_lookup.find(resolved); + if (it == _vfx_texture_lookup.end()) return nullptr; + const uint32_t idx = it->second; + if (idx >= _vfx_textures.size()) return nullptr; + return &_vfx_textures[idx].image; +} + +AllocatedImage *ParticlePass::get_or_load_vfx_texture(std::string_view assetName) +{ + if (assetName.empty() || !_context || !_context->getAssets() || !_context->getResources()) + { + return nullptr; + } + + std::string resolved = _context->getAssets()->assetPath(assetName); + if (resolved.empty()) + { + return nullptr; + } + + auto it = _vfx_texture_lookup.find(resolved); + if (it != _vfx_texture_lookup.end()) + { + const uint32_t idx = it->second; + if (idx < _vfx_textures.size()) + { + return &_vfx_textures[idx].image; + } + } + + if (_vfx_texture_failures.find(resolved) != _vfx_texture_failures.end()) + { + return nullptr; + } + + ktxutil::Ktx2D ktx{}; + if (!ktxutil::load_ktx2_2d(resolved.c_str(), ktx)) + { + _vfx_texture_failures.insert(resolved); + return nullptr; + } + + AllocatedImage img = _context->getResources()->create_image_compressed_layers( + ktx.bytes.data(), + ktx.bytes.size(), + ktx.fmt, + ktx.mipLevels, + 1, + ktx.copies, + VK_IMAGE_USAGE_SAMPLED_BIT); + + if (img.image == VK_NULL_HANDLE || img.imageView == VK_NULL_HANDLE) + { + _vfx_texture_failures.insert(resolved); + return nullptr; + } + + const uint32_t idx = static_cast(_vfx_textures.size()); + VfxTexture rec{}; + rec.resolvedPath = resolved; + rec.image = img; + _vfx_textures.push_back(std::move(rec)); + _vfx_texture_lookup.emplace(resolved, idx); + return &_vfx_textures[idx].image; +} + void ParticlePass::init(EngineContext *context) { _context = context; if (!_context || !_context->getDevice() || !_context->getResources() || !_context->getAssets() || - !_context->pipelines || !_context->getDescriptorLayouts()) + !_context->pipelines || !_context->getDescriptorLayouts() || !_context->getSamplers()) { return; } @@ -79,6 +222,36 @@ void ParticlePass::init(EngineContext *context) VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, VMA_MEMORY_USAGE_GPU_ONLY); + _draw_indices_size = VkDeviceSize(sizeof(uint32_t)) * VkDeviceSize(k_max_particles); + _draw_indices = _context->getResources()->create_buffer( + _draw_indices_size, + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, + VMA_MEMORY_USAGE_GPU_ONLY); + + _sorted_blocks_size = VkDeviceSize(sizeof(uint32_t)) * VkDeviceSize(k_max_blocks); + _sorted_blocks = _context->getResources()->create_buffer( + _sorted_blocks_size, + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, + VMA_MEMORY_USAGE_GPU_ONLY); + + // Fallback 1x1 textures (uncompressed RGBA8). + // - flipbook: white (no change) + // - noise: neutral 0.5 (no UV distortion when remapped to [-1,1]) + { + const uint32_t white = 0xFFFFFFFFu; + _fallback_flipbook = _context->getResources()->create_image(&white, + VkExtent3D{1, 1, 1}, + VK_FORMAT_R8G8B8A8_UNORM, + VK_IMAGE_USAGE_SAMPLED_BIT, + false); + const uint32_t neutral = 0x80808080u; + _fallback_noise = _context->getResources()->create_image(&neutral, + VkExtent3D{1, 1, 1}, + VK_FORMAT_R8G8B8A8_UNORM, + VK_IMAGE_USAGE_SAMPLED_BIT, + false); + } + // Zero the pool once so all particles start "dead" and get respawned deterministically by the compute update. if (_particle_pool.buffer != VK_NULL_HANDLE) { @@ -91,13 +264,37 @@ void ParticlePass::init(EngineContext *context) VkDevice device = _context->getDevice()->device(); - // Set=1 layout for graphics: particle pool SSBO. + // Set=1 layout for graphics: particle pool SSBO + optional draw indices indirection. { DescriptorLayoutBuilder builder; builder.add_binding(0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); + builder.add_binding(1, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); _particle_set_layout = builder.build(device, VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT); } + // Set=2 layout for graphics: sampled G-buffer position (for soft particles). + { + DescriptorLayoutBuilder builder; + builder.add_binding(0, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER); + _input_set_layout = builder.build( + device, + VK_SHADER_STAGE_FRAGMENT_BIT, + nullptr, + VK_DESCRIPTOR_SET_LAYOUT_CREATE_UPDATE_AFTER_BIND_POOL_BIT); + } + + // Set=3 layout for graphics: flipbook + noise textures. + { + DescriptorLayoutBuilder builder; + builder.add_binding(0, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER); + builder.add_binding(1, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER); + _vfx_set_layout = builder.build( + device, + VK_SHADER_STAGE_FRAGMENT_BIT, + nullptr, + VK_DESCRIPTOR_SET_LAYOUT_CREATE_UPDATE_AFTER_BIND_POOL_BIT); + } + // Compute update pipeline + instance. { ComputePipelineCreateInfo ci{}; @@ -112,6 +309,33 @@ void ParticlePass::init(EngineContext *context) VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 0); } + // Compute block sort + index build pipelines (alpha block-sorting). + { + ComputePipelineCreateInfo sortCi{}; + sortCi.shaderPath = _context->getAssets()->shaderPath("particles_sort_blocks.comp.spv"); + sortCi.descriptorTypes = {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER}; + sortCi.pushConstantSize = sizeof(ParticleSortPushConstants); + sortCi.pushConstantStages = VK_SHADER_STAGE_COMPUTE_BIT; + _context->pipelines->createComputePipeline("particles.sort_blocks", sortCi); + _context->pipelines->createComputeInstance("particles.sort_blocks", "particles.sort_blocks"); + _context->pipelines->setComputeInstanceBuffer("particles.sort_blocks", 0, _particle_pool.buffer, _particle_pool_size, + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 0); + _context->pipelines->setComputeInstanceBuffer("particles.sort_blocks", 1, _sorted_blocks.buffer, _sorted_blocks_size, + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 0); + + ComputePipelineCreateInfo buildCi{}; + buildCi.shaderPath = _context->getAssets()->shaderPath("particles_build_indices.comp.spv"); + buildCi.descriptorTypes = {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER}; + buildCi.pushConstantSize = sizeof(ParticleBuildIndicesPushConstants); + buildCi.pushConstantStages = VK_SHADER_STAGE_COMPUTE_BIT; + _context->pipelines->createComputePipeline("particles.build_indices", buildCi); + _context->pipelines->createComputeInstance("particles.build_indices", "particles.build_indices"); + _context->pipelines->setComputeInstanceBuffer("particles.build_indices", 0, _sorted_blocks.buffer, _sorted_blocks_size, + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 0); + _context->pipelines->setComputeInstanceBuffer("particles.build_indices", 1, _draw_indices.buffer, _draw_indices_size, + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 0); + } + // Graphics pipelines for render (additive + optional alpha). { const std::string vert = _context->getAssets()->shaderPath("particles.vert.spv"); @@ -123,8 +347,16 @@ void ParticlePass::init(EngineContext *context) base.setLayouts = { _context->getDescriptorLayouts()->gpuSceneDataLayout(), // set = 0 _particle_set_layout, // set = 1 + _input_set_layout, // set = 2 + _vfx_set_layout, // set = 3 }; + VkPushConstantRange pcr{}; + pcr.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + pcr.offset = 0; + pcr.size = sizeof(ParticleRenderPushConstants); + base.pushConstants = { pcr }; + base.configure = [this](PipelineBuilder &b) { b.set_input_topology(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST); b.set_polygon_mode(VK_POLYGON_MODE_FILL); @@ -156,6 +388,17 @@ void ParticlePass::init(EngineContext *context) void ParticlePass::cleanup() { + if (_context && _context->pipelines) + { + _context->pipelines->destroyComputeInstance("particles.update"); + _context->pipelines->destroyComputeInstance("particles.sort_blocks"); + _context->pipelines->destroyComputeInstance("particles.build_indices"); + + _context->pipelines->destroyComputePipeline("particles.update"); + _context->pipelines->destroyComputePipeline("particles.sort_blocks"); + _context->pipelines->destroyComputePipeline("particles.build_indices"); + } + if (_context && _context->getDevice()) { if (_particle_set_layout != VK_NULL_HANDLE) @@ -163,15 +406,57 @@ void ParticlePass::cleanup() vkDestroyDescriptorSetLayout(_context->getDevice()->device(), _particle_set_layout, nullptr); _particle_set_layout = VK_NULL_HANDLE; } + if (_input_set_layout != VK_NULL_HANDLE) + { + vkDestroyDescriptorSetLayout(_context->getDevice()->device(), _input_set_layout, nullptr); + _input_set_layout = VK_NULL_HANDLE; + } + if (_vfx_set_layout != VK_NULL_HANDLE) + { + vkDestroyDescriptorSetLayout(_context->getDevice()->device(), _vfx_set_layout, nullptr); + _vfx_set_layout = VK_NULL_HANDLE; + } } if (_context && _context->getResources()) { + for (auto &t : _vfx_textures) + { + if (t.image.image != VK_NULL_HANDLE) + { + _context->getResources()->destroy_image(t.image); + } + } + _vfx_textures.clear(); + _vfx_texture_lookup.clear(); + _vfx_texture_failures.clear(); + + if (_fallback_flipbook.image != VK_NULL_HANDLE) + { + _context->getResources()->destroy_image(_fallback_flipbook); + _fallback_flipbook = {}; + } + if (_fallback_noise.image != VK_NULL_HANDLE) + { + _context->getResources()->destroy_image(_fallback_noise); + _fallback_noise = {}; + } + if (_particle_pool.buffer != VK_NULL_HANDLE) { _context->getResources()->destroy_buffer(_particle_pool); _particle_pool = {}; } + if (_draw_indices.buffer != VK_NULL_HANDLE) + { + _context->getResources()->destroy_buffer(_draw_indices); + _draw_indices = {}; + } + if (_sorted_blocks.buffer != VK_NULL_HANDLE) + { + _context->getResources()->destroy_buffer(_sorted_blocks); + _sorted_blocks = {}; + } } _systems.clear(); @@ -278,6 +563,9 @@ uint32_t ParticlePass::create_system(uint32_t count) s.params = Params{}; _systems.push_back(s); + // Load default flipbook/noise if configured. Happens during UI before draw(), so uploads can be captured this frame. + preload_vfx_texture(_systems.back().flipbook_texture); + preload_vfx_texture(_systems.back().noise_texture); return s.id; } @@ -342,10 +630,13 @@ bool ParticlePass::resize_system(uint32_t id, uint32_t new_count) return true; } -void ParticlePass::register_graph(RenderGraph *graph, RGImageHandle hdrTarget, RGImageHandle depthHandle) +void ParticlePass::register_graph(RenderGraph *graph, RGImageHandle hdrTarget, RGImageHandle depthHandle, + RGImageHandle gbufferPosition) { if (!graph || !_context || !_context->pipelines || _particle_pool.buffer == VK_NULL_HANDLE || - !hdrTarget.valid() || !depthHandle.valid()) + _draw_indices.buffer == VK_NULL_HANDLE || _sorted_blocks.buffer == VK_NULL_HANDLE || + _particle_set_layout == VK_NULL_HANDLE || _input_set_layout == VK_NULL_HANDLE || _vfx_set_layout == VK_NULL_HANDLE || + !hdrTarget.valid() || !depthHandle.valid() || !gbufferPosition.valid()) { return; } @@ -396,12 +687,18 @@ void ParticlePass::register_graph(RenderGraph *graph, RGImageHandle hdrTarget, R VkBuffer pool = _particle_pool.buffer; VkDeviceSize poolSize = _particle_pool_size; + VkBuffer indices = _draw_indices.buffer; + VkDeviceSize indicesSize = _draw_indices_size; + VkBuffer sortedBlocks = _sorted_blocks.buffer; + VkDeviceSize sortedBlocksSize = _sorted_blocks_size; graph->add_pass( "Particles.Update", RGPassType::Compute, - [pool, poolSize](RGPassBuilder &builder, EngineContext *) { + [pool, poolSize, indices, indicesSize, sortedBlocks, sortedBlocksSize](RGPassBuilder &builder, EngineContext *) { builder.write_buffer(pool, RGBufferUsage::StorageReadWrite, poolSize, "particles.pool"); + builder.write_buffer(indices, RGBufferUsage::StorageReadWrite, indicesSize, "particles.indices"); + builder.write_buffer(sortedBlocks, RGBufferUsage::StorageReadWrite, sortedBlocksSize, "particles.sorted_blocks"); }, [this](VkCommandBuffer cmd, const RGPassResources &, EngineContext *ctx) { EngineContext *ctxLocal = ctx ? ctx : _context; @@ -457,6 +754,54 @@ void ParticlePass::register_graph(RenderGraph *graph, RGImageHandle hdrTarget, R ctxLocal->pipelines->dispatchComputeInstance(cmd, "particles.update", di); + if (sys.blend == BlendMode::Alpha) + { + // Ensure particle writes are visible before block depth reads. + cmd_compute_memory_barrier(cmd, + VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT, + VK_ACCESS_2_SHADER_STORAGE_READ_BIT); + + ParticleSortPushConstants spc{}; + spc.header = glm::uvec4(sys.base, sys.count, 0u, 0u); + spc.view = ctxLocal->getSceneData().view; + + ComputeDispatchInfo sdi{}; + sdi.groupCountX = 1; + sdi.groupCountY = 1; + sdi.groupCountZ = 1; + sdi.pushConstants = &spc; + sdi.pushConstantSize = sizeof(spc); + ctxLocal->pipelines->dispatchComputeInstance(cmd, "particles.sort_blocks", sdi); + + cmd_compute_memory_barrier(cmd, + VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT, + VK_ACCESS_2_SHADER_STORAGE_READ_BIT); + + ParticleBuildIndicesPushConstants ipc{}; + ipc.header = glm::uvec4(sys.base, sys.count, 0u /*flags*/, 0u); + + ComputeDispatchInfo idi{}; + idi.groupCountX = ComputeManager::calculateGroupCount(sys.count, k_local_size_x); + idi.groupCountY = 1; + idi.groupCountZ = 1; + idi.pushConstants = &ipc; + idi.pushConstantSize = sizeof(ipc); + ctxLocal->pipelines->dispatchComputeInstance(cmd, "particles.build_indices", idi); + } + else + { + ParticleBuildIndicesPushConstants ipc{}; + ipc.header = glm::uvec4(sys.base, sys.count, 1u /*identity*/, 0u); + + ComputeDispatchInfo idi{}; + idi.groupCountX = ComputeManager::calculateGroupCount(sys.count, k_local_size_x); + idi.groupCountY = 1; + idi.groupCountZ = 1; + idi.pushConstants = &ipc; + idi.pushConstantSize = sizeof(ipc); + ctxLocal->pipelines->dispatchComputeInstance(cmd, "particles.build_indices", idi); + } + sys.reset = false; } }); @@ -464,12 +809,14 @@ void ParticlePass::register_graph(RenderGraph *graph, RGImageHandle hdrTarget, R graph->add_pass( "Particles.Render", RGPassType::Graphics, - [pool, poolSize, hdrTarget, depthHandle](RGPassBuilder &builder, EngineContext *) { + [pool, poolSize, indices, indicesSize, hdrTarget, depthHandle, gbufferPosition](RGPassBuilder &builder, EngineContext *) { builder.read_buffer(pool, RGBufferUsage::StorageRead, poolSize, "particles.pool"); + builder.read_buffer(indices, RGBufferUsage::StorageRead, indicesSize, "particles.indices"); + builder.read(gbufferPosition, RGImageUsage::SampledFragment); builder.write_color(hdrTarget); builder.write_depth(depthHandle, false /*load existing depth*/); }, - [this](VkCommandBuffer cmd, const RGPassResources &, EngineContext *ctx) { + [this, gbufferPosition](VkCommandBuffer cmd, const RGPassResources &res, EngineContext *ctx) { EngineContext *ctxLocal = ctx ? ctx : _context; if (!ctxLocal || !ctxLocal->currentFrame) return; @@ -477,7 +824,15 @@ void ParticlePass::register_graph(RenderGraph *graph, RGImageHandle hdrTarget, R DeviceManager *dev = ctxLocal->getDevice(); DescriptorManager *layouts = ctxLocal->getDescriptorLayouts(); PipelineManager *pipes = ctxLocal->pipelines; - if (!rm || !dev || !layouts || !pipes || _particle_set_layout == VK_NULL_HANDLE) return; + SamplerManager *samplers = ctxLocal->getSamplers(); + if (!rm || !dev || !layouts || !pipes || !samplers || + _particle_set_layout == VK_NULL_HANDLE || _input_set_layout == VK_NULL_HANDLE) + { + return; + } + + VkImageView posView = res.image_view(gbufferPosition); + if (posView == VK_NULL_HANDLE) return; // Per-frame SceneData UBO (set=0 binding=0) AllocatedBuffer sceneBuf = rm->create_buffer(sizeof(GPUSceneData), @@ -504,9 +859,21 @@ void ParticlePass::register_graph(RenderGraph *graph, RGImageHandle hdrTarget, R { DescriptorWriter w; w.write_buffer(0, _particle_pool.buffer, _particle_pool_size, 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); + w.write_buffer(1, _draw_indices.buffer, _draw_indices_size, 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); w.update_set(dev->device(), particleSet); } + // Inputs (set=2): G-buffer position texture for soft particles. + VkDescriptorSet inputSet = ctxLocal->currentFrame->_frameDescriptors.allocate( + dev->device(), _input_set_layout); + { + DescriptorWriter w; + w.write_image(0, posView, samplers->defaultNearest(), + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER); + w.update_set(dev->device(), inputSet); + } + VkExtent2D extent = ctxLocal->getDrawExtent(); VkViewport vp{0.0f, 0.0f, float(extent.width), float(extent.height), 0.0f, 1.0f}; VkRect2D sc{{0, 0}, extent}; @@ -524,6 +891,7 @@ void ParticlePass::register_graph(RenderGraph *graph, RGImageHandle hdrTarget, R vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, 1, &globalSet, 0, nullptr); vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 1, 1, &particleSet, 0, nullptr); + vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 2, 1, &inputSet, 0, nullptr); boundBlend = blend; hasBound = true; return true; @@ -537,6 +905,61 @@ void ParticlePass::register_graph(RenderGraph *graph, RGImageHandle hdrTarget, R if (!bind_pipeline(sys.blend)) continue; } + // VFX textures (set=3): flipbook + noise (per-system). + VkDescriptorSet vfxSet = ctxLocal->currentFrame->_frameDescriptors.allocate( + dev->device(), _vfx_set_layout); + { + AllocatedImage *flipImg = find_vfx_texture(sys.flipbook_texture); + if (!flipImg || flipImg->imageView == VK_NULL_HANDLE) + { + flipImg = &_fallback_flipbook; + } + AllocatedImage *noiseImg = find_vfx_texture(sys.noise_texture); + if (!noiseImg || noiseImg->imageView == VK_NULL_HANDLE) + { + noiseImg = &_fallback_noise; + } + + DescriptorWriter w; + w.write_image(0, flipImg->imageView, samplers->defaultLinear(), + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER); + w.write_image(1, noiseImg->imageView, samplers->defaultLinear(), + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER); + w.update_set(dev->device(), vfxSet); + } + vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 3, 1, &vfxSet, 0, nullptr); + + ParticleRenderPushConstants pc{}; + const float invW = (extent.width > 0) ? (1.0f / float(extent.width)) : 0.0f; + const float invH = (extent.height > 0) ? (1.0f / float(extent.height)) : 0.0f; + const float softDist = clamp_nonnegative(sys.params.soft_depth_distance); + pc.screen = glm::vec4(invW, invH, softDist, _time_sec); + + const uint32_t colsU = std::max(sys.params.flipbook_cols, 1u); + const uint32_t rowsU = std::max(sys.params.flipbook_rows, 1u); + float fps = sys.params.flipbook_fps; + if (!std::isfinite(fps)) fps = 0.0f; + fps = std::clamp(fps, 0.0f, 240.0f); + float intensity = sys.params.flipbook_intensity; + if (!std::isfinite(intensity)) intensity = 0.0f; + intensity = std::max(0.0f, intensity); + pc.flipbook = glm::vec4(float(colsU), float(rowsU), fps, intensity); + + float noiseScale = sys.params.noise_scale; + if (!std::isfinite(noiseScale)) noiseScale = 0.0f; + noiseScale = std::max(0.0f, noiseScale); + float noiseStrength = sys.params.noise_strength; + if (!std::isfinite(noiseStrength)) noiseStrength = 0.0f; + noiseStrength = std::max(0.0f, noiseStrength); + glm::vec2 scroll = sys.params.noise_scroll; + if (!std::isfinite(scroll.x)) scroll.x = 0.0f; + if (!std::isfinite(scroll.y)) scroll.y = 0.0f; + pc.noise = glm::vec4(noiseScale, noiseStrength, scroll.x, scroll.y); + + vkCmdPushConstants(cmd, layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(pc), &pc); + // Instanced quad draw. gl_InstanceIndex includes firstInstance, so it becomes the particle index. vkCmdDraw(cmd, 6, sys.count, 0, sys.base); if (ctxLocal->stats) diff --git a/src/render/passes/particles.h b/src/render/passes/particles.h index b432711..888b70b 100644 --- a/src/render/passes/particles.h +++ b/src/render/passes/particles.h @@ -5,6 +5,9 @@ #include "render/renderpass.h" #include +#include +#include +#include #include class RenderGraph; @@ -41,6 +44,20 @@ public: float gravity{0.0f}; // positive pulls down -Y in local space glm::vec4 color{1.0f, 0.5f, 0.1f, 1.0f}; + + // Fade particles near opaque geometry intersections (0 disables). + float soft_depth_distance{0.15f}; + + // Flipbook sampling (atlas layout and animation). + uint32_t flipbook_cols{16}; + uint32_t flipbook_rows{4}; + float flipbook_fps{30.0f}; + float flipbook_intensity{1.0f}; + + // Noise UV distortion + float noise_scale{6.0f}; + float noise_strength{0.05f}; + glm::vec2 noise_scroll{0.0f, 0.0f}; }; struct System @@ -52,6 +69,11 @@ public: bool reset{true}; BlendMode blend{BlendMode::Additive}; Params params{}; + + // Asset-relative names (resolved via AssetManager::assetPath). + // Empty disables (falls back to procedural sprite / no distortion). + std::string flipbook_texture{"vfx/flame.ktx2"}; + std::string noise_texture{"vfx/simplex.ktx2"}; }; void init(EngineContext *context) override; @@ -59,7 +81,8 @@ public: void execute(VkCommandBuffer cmd) override; const char *getName() const override { return "Particles"; } - void register_graph(RenderGraph *graph, RGImageHandle hdrTarget, RGImageHandle depthHandle); + void register_graph(RenderGraph *graph, RGImageHandle hdrTarget, RGImageHandle depthHandle, + RGImageHandle gbufferPosition); uint32_t create_system(uint32_t count); bool destroy_system(uint32_t id); @@ -71,6 +94,11 @@ public: uint32_t allocated_particles() const; uint32_t free_particles() const; + // Preload a VFX texture (e.g. "vfx/flame.ktx2"). Safe to call from UI. + void preload_vfx_texture(const std::string &assetName); + // Preload all textures referenced by current systems. Call once per frame before ResourceUploads pass is registered. + void preload_needed_textures(); + private: struct FreeRange { @@ -89,7 +117,29 @@ private: AllocatedBuffer _particle_pool{}; VkDeviceSize _particle_pool_size = 0; + AllocatedBuffer _draw_indices{}; + VkDeviceSize _draw_indices_size = 0; + + AllocatedBuffer _sorted_blocks{}; + VkDeviceSize _sorted_blocks_size = 0; + VkDescriptorSetLayout _particle_set_layout = VK_NULL_HANDLE; + VkDescriptorSetLayout _input_set_layout = VK_NULL_HANDLE; + VkDescriptorSetLayout _vfx_set_layout = VK_NULL_HANDLE; + + struct VfxTexture + { + std::string resolvedPath; + AllocatedImage image{}; + }; + std::vector _vfx_textures; + std::unordered_map _vfx_texture_lookup; + std::unordered_set _vfx_texture_failures; + AllocatedImage _fallback_flipbook{}; + AllocatedImage _fallback_noise{}; + + AllocatedImage *get_or_load_vfx_texture(std::string_view assetName); + AllocatedImage *find_vfx_texture(std::string_view assetName); uint32_t _next_system_id = 1; std::vector _systems; @@ -102,4 +152,3 @@ private: WorldVec3 _prev_origin_world{0.0, 0.0, 0.0}; bool _has_prev_origin = false; }; -