From d6216b20fc7ed13ebd89d8d5dea15dba18885512 Mon Sep 17 00:00:00 2001 From: hydrogendeuteride Date: Thu, 25 Dec 2025 22:09:02 +0900 Subject: [PATCH] ADD: Docs and shader optim --- docs/ParticleSystem.md | 366 +++++++++++++++++++++ docs/README.md | 189 +++++++++++ docs/Volumetrics.md | 487 ++++++++++++++++++++++++++++ shaders/background_env.frag | 9 +- shaders/deferred_lighting.frag | 13 +- shaders/deferred_lighting_nort.frag | 13 +- shaders/gbuffer.frag | 1 + shaders/mesh.frag | 13 +- shaders/mesh.vert | 5 +- shaders/ssr.frag | 13 +- shaders/ssr_rt.frag | 15 +- shaders/tonemap.frag | 57 ++-- src/core/types.h | 8 + src/render/passes/geometry.cpp | 6 + src/render/passes/shadow.cpp | 37 ++- src/render/passes/transparent.cpp | 6 + 16 files changed, 1178 insertions(+), 60 deletions(-) create mode 100644 docs/ParticleSystem.md create mode 100644 docs/README.md create mode 100644 docs/Volumetrics.md diff --git a/docs/ParticleSystem.md b/docs/ParticleSystem.md new file mode 100644 index 0000000..7d31082 --- /dev/null +++ b/docs/ParticleSystem.md @@ -0,0 +1,366 @@ +# Particle System + +The particle system provides GPU-accelerated particle simulation and rendering with support for flipbook animation, soft particles, and alpha/additive blending. + +## Architecture Overview + +The system is implemented across multiple components: + +- **ParticlePass** (`src/render/passes/particles.h/.cpp`) — Render pass managing particle pools, compute pipelines, and graphics pipelines +- **GameAPI** (`src/core/game_api.h`) — High-level API for creating and controlling particle systems +- **Shaders** — Compute and graphics shaders for simulation and rendering + - `shaders/particles_update.comp` — Per-particle physics simulation + - `shaders/particles_sort_blocks.comp` — Block-level depth sorting for alpha blending + - `shaders/particles_build_indices.comp` — Build draw indices from sorted blocks + - `shaders/particles.vert/.frag` — Vertex/fragment shaders for rendering + +## Key Features + +- **Global particle pool**: Up to 128K particles (`k_max_particles = 128 * 1024`) shared across all systems +- **GPU simulation**: Fully GPU-driven via compute shaders (no CPU readback) +- **Flipbook animation**: Supports sprite sheet animation with configurable atlas layout and FPS +- **Soft particles**: Depth-aware fading near opaque geometry +- **Blend modes**: Additive (fire, sparks) and Alpha (smoke, debris) with automatic depth sorting +- **Noise distortion**: Optional UV distortion for organic motion +- **Floating-origin stable**: Automatically adjusts particle positions when world origin shifts + +## Particle Data Layout + +Each particle is represented as 64 bytes (4 × vec4) on the GPU: + +```glsl +struct Particle +{ + vec4 pos_age; // xyz = local position, w = remaining life (seconds) + vec4 vel_life; // xyz = local velocity, w = total lifetime (seconds) + vec4 color; // rgba + vec4 misc; // x=size, y=random seed, z/w=unused +}; +``` + +## Creating Particle Systems + +### Via GameAPI + +```cpp +#include "core/game_api.h" + +GameAPI::Engine api(&engine); + +// Create a particle system with 1024 particles +uint32_t systemId = api.create_particle_system(1024); + +// Configure parameters +GameAPI::ParticleSystem sys = api.get_particle_system(systemId); +sys.enabled = true; +sys.reset = true; // Respawn all particles immediately +sys.blendMode = GameAPI::ParticleBlendMode::Additive; + +// Emitter settings +sys.params.emitterPosLocal = glm::vec3(0.0f, 0.0f, 0.0f); +sys.params.spawnRadius = 0.1f; +sys.params.emitterDirLocal = glm::vec3(0.0f, 1.0f, 0.0f); // Upward +sys.params.coneAngleDegrees = 20.0f; + +// Particle properties +sys.params.minSpeed = 2.0f; +sys.params.maxSpeed = 8.0f; +sys.params.minLife = 0.5f; +sys.params.maxLife = 1.5f; +sys.params.minSize = 0.05f; +sys.params.maxSize = 0.15f; + +// Physics +sys.params.drag = 1.0f; +sys.params.gravity = 0.0f; // Positive pulls down -Y in local space + +// Appearance +sys.params.color = glm::vec4(1.0f, 0.5f, 0.1f, 1.0f); // Orange + +// Flipbook animation (16×4 atlas, 30 FPS) +sys.flipbookTexture = "vfx/flame.ktx2"; +sys.params.flipbookCols = 16; +sys.params.flipbookRows = 4; +sys.params.flipbookFps = 30.0f; +sys.params.flipbookIntensity = 1.0f; + +// Noise distortion +sys.noiseTexture = "vfx/simplex.ktx2"; +sys.params.noiseScale = 6.0f; +sys.params.noiseStrength = 0.05f; +sys.params.noiseScroll = glm::vec2(0.0f, 0.0f); + +// Soft particles +sys.params.softDepthDistance = 0.15f; // Fade particles within 0.15 units of geometry + +api.set_particle_system(systemId, sys); +``` + +### Direct API + +```cpp +ParticlePass* particlePass = /* obtain from RenderPassManager */; + +// Create system +uint32_t systemId = particlePass->create_system(1024); + +// Access and modify +auto& systems = particlePass->systems(); +for (auto& sys : systems) +{ + if (sys.id == systemId) + { + sys.enabled = true; + sys.params.color = glm::vec4(1.0f, 0.0f, 0.0f, 1.0f); + break; + } +} +``` + +## Simulation Details + +### Update Pipeline (Compute) + +The `particles_update.comp` shader runs once per frame for each active system: + +1. **Floating-origin correction**: `p.pos_age.xyz -= origin_delta` keeps particles stable when the world origin shifts +2. **Respawn check**: Dead particles (`age <= 0`) or reset flag respawns particles with randomized properties +3. **Physics integration**: + - Apply gravity: `vel += vec3(0, -gravity, 0) * dt` + - Apply drag: `vel *= exp(-drag * dt)` + - Integrate position: `pos += vel * dt` +4. **Age decrement**: `age -= dt` + +Random number generation uses a per-particle seed (`misc.y`) combined with system time to ensure deterministic but varied behavior. + +### Cone Emission + +When `coneAngleDegrees > 0`, particles are emitted within a cone: +- Cone axis is `emitterDirLocal` +- Particles are randomly distributed within the cone solid angle +- `coneAngleDegrees = 0` emits in a single direction +- `coneAngleDegrees < 0` emits in all directions (sphere) + +### Spawn Radius + +Particles spawn at `emitterPosLocal ± random_in_sphere(spawnRadius)`. + +## Rendering Pipeline + +### Blend Modes + +**Additive** (`BlendMode::Additive`): +- Source: `VK_BLEND_FACTOR_SRC_ALPHA` +- Dest: `VK_BLEND_FACTOR_ONE` +- No depth sorting required +- Ideal for fire, sparks, energy effects + +**Alpha** (`BlendMode::Alpha`): +- Source: `VK_BLEND_FACTOR_SRC_ALPHA` +- Dest: `VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA` +- Block-level depth sorting (256 particles per block) +- Better for smoke, debris, leaves + +### Alpha Sorting + +For alpha-blended systems: + +1. **Block sorting** (`particles_sort_blocks.comp`): Divides particles into 256-particle blocks, computes average depth per block, sorts blocks back-to-front +2. **Index building** (`particles_build_indices.comp`): Writes sorted particle indices into `_draw_indices` buffer +3. **Rendering**: Vertex shader reads particles via indirection: `Particle p = pool.particles[indices[gl_InstanceIndex]]` + +This provides coarse-grained sorting with minimal compute overhead (512 blocks max). + +### Soft Particles + +Fragment shader samples G-buffer depth (`gbufferPosition.w`) and fades particle alpha near intersections: + +```glsl +float sceneDepth = texture(posTex, screenUV).w; +float particleDepth = /* compute from world pos */; +float depthDiff = sceneDepth - particleDepth; +float softFactor = smoothstep(0.0, softDepthDistance, depthDiff); +outColor.a *= softFactor; +``` + +Set `softDepthDistance = 0` to disable. + +### Flipbook Animation + +The fragment shader samples an animated sprite sheet: + +1. Compute frame index: `frameIndex = int(time_sec * flipbookFps) % (flipbookCols * flipbookRows)` +2. Map frame to UV rect: `(col, row) = (frameIndex % cols, frameIndex / cols)` +3. Sample texture: `color = texture(flipbookTex, baseUV * cellSize + cellOffset)` + +### Noise Distortion + +Optional UV distortion using a noise texture: + +```glsl +vec2 noiseUV = uv * noiseScale + noiseScroll * time_sec; +vec2 distortion = (texture(noiseTex, noiseUV).rg - 0.5) * 2.0 * noiseStrength; +vec2 finalUV = uv + distortion; +``` + +## Memory Management + +### Particle Pool Allocation + +The global pool is pre-allocated (128K particles × 64 bytes = 8 MB) and subdivided into ranges: + +- `create_system(count)`: Allocates a contiguous range from `_free_ranges` +- `destroy_system(id)`: Returns range to free list and merges adjacent ranges +- `resize_system(id, new_count)`: Reallocates (may move particles) + +Allocation uses a simple first-fit strategy with automatic coalescing. + +### Texture Caching + +VFX textures (flipbook/noise) are loaded on-demand and cached in `_vfx_textures`: +- `preload_vfx_texture(assetName)`: Explicitly load texture (safe to call from UI) +- `preload_needed_textures()`: Load all textures referenced by active systems (call before ResourceUploads pass) +- Fallback 1×1 textures (`_fallback_flipbook`, `_fallback_noise`) are used when load fails + +## Render Graph Integration + +The particle pass registers into the render graph after lighting and SSR: + +```cpp +void ParticlePass::register_graph(RenderGraph* graph, + RGImageHandle hdrTarget, + RGImageHandle depthHandle, + RGImageHandle gbufferPosition) +{ + graph->add_pass("Particles", RGPassType::Graphics, + [=](RGPassBuilder& b, EngineContext*) { + b.write_color(hdrTarget); // Composite onto HDR + b.read_depth(depthHandle); // Depth test + b.sample_image(gbufferPosition); // Soft particles + }, + [this](VkCommandBuffer cmd, const RGPassResources& res, EngineContext* ctx) { + // 1. Run compute update for each system + // 2. For alpha systems: sort blocks + build indices + // 3. Render all systems (additive first, then alpha) + } + ); +} +``` + +## Performance Considerations + +- **Particle count**: 128K global limit; budget carefully across systems +- **Overdraw**: Additive blending is fill-rate intensive; keep particle size and count moderate +- **Sorting cost**: Alpha systems incur compute overhead for block sorting (~512 blocks × 256 particles) +- **Texture bandwidth**: Flipbook textures should be compressed (KTX2) and atlased (16×4 common) +- **Soft particles**: G-buffer read adds bandwidth; disable if depth fading isn't visible + +## Common Presets + +### Fire + +```cpp +sys.blendMode = Additive; +sys.params.color = glm::vec4(1.0f, 0.5f, 0.1f, 1.0f); // Orange +sys.params.gravity = 0.0f; +sys.params.minSpeed = 1.0f; sys.params.maxSpeed = 3.0f; +sys.params.drag = 0.5f; +sys.flipbookTexture = "vfx/flame.ktx2"; +``` + +### Smoke + +```cpp +sys.blendMode = Alpha; +sys.params.color = glm::vec4(0.3f, 0.3f, 0.3f, 0.5f); // Gray, semi-transparent +sys.params.gravity = -2.0f; // Rise upward (negative gravity) +sys.params.drag = 1.5f; // Slow down quickly +sys.params.minSpeed = 0.5f; sys.params.maxSpeed = 2.0f; +sys.noiseTexture = "vfx/simplex.ktx2"; +sys.params.noiseStrength = 0.2f; // Strong distortion +``` + +### Sparks + +```cpp +sys.blendMode = Additive; +sys.params.color = glm::vec4(1.0f, 0.8f, 0.2f, 1.0f); // Bright yellow +sys.params.gravity = 9.8f; // Fall downward +sys.params.drag = 0.1f; +sys.params.minSpeed = 5.0f; sys.params.maxSpeed = 15.0f; +sys.params.minSize = 0.01f; sys.params.maxSize = 0.03f; // Small +sys.flipbookTexture = ""; // Disable flipbook (procedural sprite) +``` + +## Troubleshooting + +**Particles not visible**: +- Ensure `enabled = true` and `particleCount > 0` +- Check `color.a > 0` (fully transparent particles are invisible) +- Verify system is allocated: `api.get_particle_systems()` should list the ID + +**Particles flickering or popping**: +- Set `reset = false` after first frame (reset respawns all particles immediately) +- Increase `minLife`/`maxLife` to prevent frequent respawning + +**Performance issues**: +- Reduce total particle count (check `allocated_particles()`) +- Use additive blend for most systems (cheaper than alpha) +- Reduce flipbook texture resolution or mip levels + +**Textures missing**: +- Call `preload_vfx_texture("vfx/texture.ktx2")` before first frame +- Or call `preload_needed_textures()` in engine setup +- Check AssetManager can resolve path: `assetPath("vfx/texture.ktx2")` + +## API Reference + +### ParticlePass + +```cpp +class ParticlePass : public IRenderPass +{ + // System management + uint32_t create_system(uint32_t count); + bool destroy_system(uint32_t id); + bool resize_system(uint32_t id, uint32_t new_count); + + std::vector& systems(); + const std::vector& systems() const; + + // Pool stats + uint32_t allocated_particles() const; + uint32_t free_particles() const; + + // Texture preloading + void preload_vfx_texture(const std::string& assetName); + void preload_needed_textures(); +}; +``` + +### GameAPI::Engine Particle Methods + +```cpp +// System creation/destruction +uint32_t create_particle_system(uint32_t particle_count); +bool destroy_particle_system(uint32_t system_id); + +// System control +void set_particle_system(uint32_t system_id, const ParticleSystem& sys); +ParticleSystem get_particle_system(uint32_t system_id) const; +std::vector get_particle_systems() const; + +// Pool stats +uint32_t get_particle_pool_allocated() const; +uint32_t get_particle_pool_free() const; + +// Texture preloading +void preload_particle_texture(const std::string& asset_path); +``` + +## See Also + +- `docs/RenderGraph.md` — Render graph integration details +- `docs/RenderPasses.md` — Pass execution and pipeline management +- `docs/GameAPI.md` — High-level game API +- `docs/TextureLoading.md` — Asset loading and streaming diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 0000000..ecfe0ff --- /dev/null +++ b/docs/README.md @@ -0,0 +1,189 @@ +# Vulkan Engine Documentation + +Welcome to the Vulkan Engine documentation. This engine is a modern, high-performance rendering engine built with Vulkan, featuring a deferred PBR pipeline, GPU-driven systems, and comprehensive tooling for game development. + +## Quick Start + +- **[BUILD.md](BUILD.md)** — Build instructions, dependencies, and platform-specific setup +- **[RUNTIME.md](RUNTIME.md)** — Runtime architecture and execution flow +- **[TROUBLESHOOTING.md](TROUBLESHOOTING.md)** — Common issues and solutions + +## Core Architecture + +### Engine Foundation + +- **[EngineContext.md](EngineContext.md)** — Central dependency injection container and per-frame state +- **[FrameResources.md](FrameResources.md)** — Frame-in-flight synchronization and resource management +- **[ResourceManager.md](ResourceManager.md)** — VMA-based GPU memory allocation and resource lifecycle +- **[FloatingOrigin.md](FloatingOrigin.md)** — Large-world support with double-precision coordinates + +### Rendering + +- **[RenderGraph.md](RenderGraph.md)** — DAG-based render pass scheduling with automatic barriers +- **[RenderPasses.md](RenderPasses.md)** — Built-in passes: geometry, lighting, SSR, volumetrics, particles, tonemap, FXAA +- **[PipelineManager.md](PipelineManager.md)** — Graphics/compute pipeline creation and hot-reloading +- **[Descriptors.md](Descriptors.md)** — Descriptor set management and binding strategies +- **[SHADERS.md](SHADERS.md)** — Shader compilation, includes, and conventions + +### Advanced Rendering Features + +- **[MultiLighting.md](MultiLighting.md)** — Deferred lighting with point/spot lights and IBL +- **[IBL.md](IBL.md)** — Image-based lighting and local reflection probes +- **[RayTracing.md](RayTracing.md)** — Ray-traced shadows and reflections with hybrid modes +- **[ParticleSystem.md](ParticleSystem.md)** — GPU particle simulation (128K particles, flipbook, soft particles) +- **[Volumetrics.md](Volumetrics.md)** — Voxel-based clouds, smoke, and flame with raymarching +- **[materials.md](materials.md)** — PBR material system and texture bindings + +### Scene Management + +- **[Scene.md](Scene.md)** — Scene graph, node hierarchy, and draw context +- **[ASSETS.md](ASSETS.md)** — Asset management overview +- **[asset_manager.md](asset_manager.md)** — AssetManager API and async loading +- **[TextureLoading.md](TextureLoading.md)** — Texture streaming, VRAM budgeting, and KTX2 support +- **[Picking.md](Picking.md)** — BVH-based object picking and selection + +### UI and Input + +- **[ImGuiSystem.md](ImGuiSystem.md)** — ImGui integration and debug UI +- **[InputSystem.md](InputSystem.md)** — Keyboard, mouse, and cursor handling + +### Compute and Effects + +- **[Compute.md](Compute.md)** — Compute pipeline creation and dispatch + +### Game Development API + +- **[GameAPI.md](GameAPI.md)** — High-level game-facing API (textures, lighting, picking, particles, volumetrics) +- **[debug_draw_api_examples.md](debug_draw_api_examples.md)** — Debug drawing examples (lines, spheres, AABBs, etc.) + +## Documentation Organization + +### By System + +**Core Systems:** +- Engine: [EngineContext.md](EngineContext.md), [FrameResources.md](FrameResources.md), [ResourceManager.md](ResourceManager.md) +- Rendering: [RenderGraph.md](RenderGraph.md), [RenderPasses.md](RenderPasses.md), [PipelineManager.md](PipelineManager.md) +- Scene: [Scene.md](Scene.md), [asset_manager.md](asset_manager.md), [TextureLoading.md](TextureLoading.md) + +**Rendering Features:** +- Lighting: [MultiLighting.md](MultiLighting.md), [IBL.md](IBL.md) +- Effects: [ParticleSystem.md](ParticleSystem.md), [Volumetrics.md](Volumetrics.md) +- Post-processing: [RenderPasses.md](RenderPasses.md) (SSR, Tonemap, FXAA sections) +- Ray Tracing: [RayTracing.md](RayTracing.md) + +**Developer Tools:** +- Debugging: [debug_draw_api_examples.md](debug_draw_api_examples.md), [ImGuiSystem.md](ImGuiSystem.md) +- Input: [InputSystem.md](InputSystem.md), [Picking.md](Picking.md) + +### By Task + +**Setting up the engine:** +1. [BUILD.md](BUILD.md) — Build and dependencies +2. [RUNTIME.md](RUNTIME.md) — Understanding the runtime loop +3. [EngineContext.md](EngineContext.md) — Core architecture +4. [GameAPI.md](GameAPI.md) — High-level API + +**Creating content:** +1. [ASSETS.md](ASSETS.md) — Asset pipeline overview +2. [TextureLoading.md](TextureLoading.md) — Loading textures +3. [Scene.md](Scene.md) — Adding objects to the scene +4. [materials.md](materials.md) — Material setup + +**Adding effects:** +1. [MultiLighting.md](MultiLighting.md) — Point/spot lights +2. [ParticleSystem.md](ParticleSystem.md) — Particles (fire, smoke, sparks) +3. [Volumetrics.md](Volumetrics.md) — Clouds and atmospheric effects +4. [IBL.md](IBL.md) — Environment lighting + +**Debugging and visualization:** +1. [debug_draw_api_examples.md](debug_draw_api_examples.md) — Debug primitives +2. [ImGuiSystem.md](ImGuiSystem.md) — Debug UI +3. [Picking.md](Picking.md) — Object selection + +**Optimizing performance:** +1. [TextureLoading.md](TextureLoading.md) — VRAM budgeting +2. [RenderGraph.md](RenderGraph.md) — Render pass optimization +3. [FrameResources.md](FrameResources.md) — Frame synchronization + +**Writing shaders:** +1. [SHADERS.md](SHADERS.md) — Shader conventions +2. [Descriptors.md](Descriptors.md) — Descriptor bindings +3. [RenderPasses.md](RenderPasses.md) — Custom passes + +**Advanced topics:** +1. [RayTracing.md](RayTracing.md) — Hardware ray tracing +2. [FloatingOrigin.md](FloatingOrigin.md) — Large worlds +3. [Compute.md](Compute.md) — GPU compute + +## Rendering Pipeline Overview + +The engine uses a deferred PBR pipeline with the following stages: + +1. **Background** — Sky/gradient generation (compute) +2. **Geometry** — G-Buffer pass (position, normal, albedo, AO/emissive) +3. **Shadows** — Cascaded shadow maps (4 cascades, optional RT) +4. **Lighting** — Deferred PBR lighting (point/spot/directional, IBL) +5. **SSR** — Screen-space reflections (optional RT fallback) +6. **Volumetrics** — Voxel clouds/smoke/flame (up to 4 volumes) +7. **Particles** — GPU particle systems (up to 128K particles) +8. **Tonemap + Bloom** — HDR → LDR conversion +9. **FXAA** — Anti-aliasing +10. **Transparent** — Forward rendering for transparent objects +11. **DebugDraw** — Debug visualization +12. **ImGui** — UI overlay +13. **Present** — Swapchain presentation + +See [RenderPasses.md](RenderPasses.md) for details. + +## Key Features + +- **Modern Vulkan API** — Dynamic rendering, synchronization2, ray query +- **Deferred PBR Pipeline** — Physically-based materials with IBL +- **GPU-Driven Systems** — Particles and volumetrics fully GPU-simulated +- **Render Graph** — Automatic barrier insertion and resource management +- **Ray Tracing** — Hybrid shadows and reflections (optional) +- **Texture Streaming** — VRAM budgeting with LRU eviction +- **Floating-Origin** — Double-precision world coordinates for large worlds +- **Hot-Reload** — Shader recompilation without restart +- **Debug Tools** — Immediate-mode debug drawing and ImGui integration + +## Architecture Highlights + +### Rendering + +- **Render Graph** ([RenderGraph.md](RenderGraph.md)): DAG-based execution with automatic resource transitions +- **Pipeline Manager** ([PipelineManager.md](PipelineManager.md)): Hot-reloadable shaders and compute pipelines +- **Multi-Lighting** ([MultiLighting.md](MultiLighting.md)): Clustered forward+ deferred hybrid + +### GPU-Driven Effects + +- **Particles** ([ParticleSystem.md](ParticleSystem.md)): 128K particle global pool, compute-based simulation, block-level depth sorting +- **Volumetrics** ([Volumetrics.md](Volumetrics.md)): Semi-Lagrangian advection, procedural noise injection, raymarch composite + +### Asset Pipeline + +- **Async Loading** ([asset_manager.md](asset_manager.md)): Background thread pool with priority queuing +- **Texture Streaming** ([TextureLoading.md](TextureLoading.md)): Automatic VRAM management with upload budgeting +- **KTX2 Support**: Compressed texture formats (BC7, ASTC) with mipmaps + +### Developer Experience + +- **GameAPI** ([GameAPI.md](GameAPI.md)): Stable, high-level C++ API abstracting Vulkan details +- **Debug Drawing** ([debug_draw_api_examples.md](debug_draw_api_examples.md)): Immediate-mode primitives with depth testing +- **ImGui Integration** ([ImGuiSystem.md](ImGuiSystem.md)): Full engine UI with live parameter editing + +## Contributing + +When adding new features: + +1. Update relevant documentation in `docs/` +2. Add examples to [GameAPI.md](GameAPI.md) if exposing new API +3. Include shader documentation in [SHADERS.md](SHADERS.md) for new shaders + +## Getting Help + +- **Build issues**: [BUILD.md](BUILD.md), [TROUBLESHOOTING.md](TROUBLESHOOTING.md) +- **Runtime errors**: [RUNTIME.md](RUNTIME.md), [EngineContext.md](EngineContext.md) +- **Performance**: [TextureLoading.md](TextureLoading.md), [RenderGraph.md](RenderGraph.md) +- **Usage questions**: [GameAPI.md](GameAPI.md), [debug_draw_api_examples.md](debug_draw_api_examples.md) + diff --git a/docs/Volumetrics.md b/docs/Volumetrics.md new file mode 100644 index 0000000..6750fe9 --- /dev/null +++ b/docs/Volumetrics.md @@ -0,0 +1,487 @@ +# Volumetric Cloud System + +The volumetric system provides GPU-accelerated voxel-based rendering for clouds, smoke, and flame effects using raymarching and procedural density simulation. + +## Architecture Overview + +The system is implemented across multiple components: + +- **CloudPass** (`src/render/passes/clouds.h/.cpp`) — Render pass managing voxel volumes, compute simulation, and raymarching +- **GameAPI** (`src/core/game_api.h`) — High-level API for configuring volumetric effects +- **Shaders** + - `shaders/cloud_voxel_advect.comp` — Voxel density simulation (advection + injection) + - `shaders/clouds.frag` — Raymarching fragment shader + +## Key Features + +- **Voxel-based density**: Cubic grids (4-256³ resolution) storing per-voxel density values +- **Three volume types**: Clouds (infinite XZ wrap), Smoke (localized), Flame (emissive) +- **GPU simulation**: Semi-Lagrangian advection with procedural noise injection +- **Raymarching composite**: Beer-Lambert absorption + single-scattering approximation +- **Ping-pong buffers**: Double-buffered voxel grids for temporal stability +- **Camera following**: Volumes can anchor to camera XZ (infinite clouds) or drift in world-space +- **Floating-origin stable**: Automatically adjusts volume positions when world origin shifts +- **Multi-volume support**: Up to 4 independent volumes (`MAX_VOXEL_VOLUMES = 4`) + +## Volume Types + +### Clouds (Type 0) + +- **Behavior**: Continuous XZ wrapping for infinite cloud layers +- **Injection**: Broad slab with height-based shaping (upper/lower bounds) +- **Advection**: Wind wraps in XZ, clamped in Y +- **Typical use**: Sky clouds, atmospheric layers + +### Smoke (Type 1) + +- **Behavior**: Localized emission with soft dissipation +- **Injection**: Spherical emitter in UVW space with softer noise threshold +- **Advection**: Fully clamped (no wrapping) +- **Typical use**: Smoke columns, steam, fog banks + +### Flame (Type 2) + +- **Behavior**: Flickering emissive source with strong noise +- **Injection**: Spiky procedural noise, blends toward injected field (avoids fog accumulation) +- **Advection**: Fully clamped (no wrapping) +- **Rendering**: Adds emission term (`emissionColor × emissionStrength`) +- **Typical use**: Fires, torches, explosions + +## Creating Volumetric Effects + +### Via GameAPI + +```cpp +#include "core/game_api.h" + +GameAPI::Engine api(&engine); + +// Enable volumetrics globally +api.set_volumetrics_enabled(true); + +// Configure a cloud volume (index 0) +GameAPI::VoxelVolumeSettings cloud; +cloud.enabled = true; +cloud.type = GameAPI::VoxelVolumeType::Clouds; + +// Position: follow camera XZ, offset in Y +cloud.followCameraXZ = true; +cloud.volumeCenterLocal = glm::vec3(0.0f, 50.0f, 0.0f); // 50 units above camera +cloud.volumeHalfExtents = glm::vec3(100.0f, 20.0f, 100.0f); // 200×40×200 box + +// Animation: enable voxel advection +cloud.animateVoxels = true; +cloud.windVelocityLocal = glm::vec3(5.0f, 2.0f, 0.0f); // Drift +X, rise +Y +cloud.dissipation = 0.5f; // Slow decay +cloud.noiseStrength = 0.8f; +cloud.noiseScale = 8.0f; +cloud.noiseSpeed = 0.3f; + +// Rendering +cloud.densityScale = 1.5f; +cloud.coverage = 0.3f; // Higher = less dense (threshold) +cloud.extinction = 1.0f; +cloud.stepCount = 64; // Raymarch steps (quality vs performance) +cloud.gridResolution = 64; // 64³ voxel grid + +// Shading +cloud.albedo = glm::vec3(1.0f, 1.0f, 1.0f); // White clouds +cloud.scatterStrength = 1.2f; +cloud.emissionColor = glm::vec3(0.0f); // No emission +cloud.emissionStrength = 0.0f; + +api.set_voxel_volume(0, cloud); +``` + +### Flame Effect + +```cpp +GameAPI::VoxelVolumeSettings flame; +flame.enabled = true; +flame.type = GameAPI::VoxelVolumeType::Flame; + +// Position: absolute world location +flame.followCameraXZ = false; +flame.volumeCenterLocal = glm::vec3(0.0f, 1.0f, 0.0f); +flame.volumeHalfExtents = glm::vec3(1.0f, 2.0f, 1.0f); // 2×4×2 box + +// Animation +flame.animateVoxels = true; +flame.windVelocityLocal = glm::vec3(0.0f, 8.0f, 0.0f); // Rise upward +flame.dissipation = 2.0f; // Fast decay +flame.noiseStrength = 1.5f; +flame.noiseScale = 10.0f; +flame.noiseSpeed = 2.0f; + +// Emitter in UVW space (bottom center) +flame.emitterUVW = glm::vec3(0.5f, 0.05f, 0.5f); +flame.emitterRadius = 0.2f; // 20% of volume size + +// Shading +flame.densityScale = 2.0f; +flame.coverage = 0.0f; +flame.extinction = 0.8f; +flame.stepCount = 48; +flame.gridResolution = 48; + +flame.albedo = glm::vec3(1.0f, 0.6f, 0.2f); // Orange scatter +flame.scatterStrength = 0.5f; +flame.emissionColor = glm::vec3(1.0f, 0.5f, 0.1f); // Orange-red glow +flame.emissionStrength = 3.0f; // Strong emission + +api.set_voxel_volume(1, flame); +``` + +## Simulation Details + +### Voxel Advection (Compute Shader) + +The `cloud_voxel_advect.comp` shader updates voxel density each frame: + +1. **Semi-Lagrangian advection**: Backtrace along wind velocity + ```glsl + vec3 back = uvw - (windVelocityLocal / volumeSize) * dt; + ``` + - Clouds: Wrap XZ (`fract(back.xz)`), clamp Y + - Smoke/Flame: Clamp all axes + +2. **Trilinear sampling**: Sample input density at backtraced position + ```glsl + float advected = sample_density_trilinear(back, gridResolution); + ``` + +3. **Dissipation**: Exponential decay + ```glsl + advected *= exp(-dissipation * dt); + ``` + +4. **Noise injection**: Procedural density injection using 4-octave FBM + - **Clouds**: Broad slab with height shaping + ```glsl + injected = smoothstep(0.55, 0.80, fbm3(uvw * noiseScale + time * noiseSpeed)); + low = smoothstep(0.0, 0.18, uvw.y); + high = 1.0 - smoothstep(0.78, 1.0, uvw.y); + injected *= low * high; + ``` + - **Smoke**: Spherical emitter with softer threshold + ```glsl + shape = 1.0 - smoothstep(emitterRadius, emitterRadius * 1.25, distance(uvw, emitterUVW)); + injected = smoothstep(0.45, 0.75, fbm3(...)) * shape; + ``` + - **Flame**: Spiky noise with flickering + ```glsl + injected = (fbm3(...) ^ 2) * shape; + out_density = mix(advected, injected, noiseStrength * dt); // Blend toward injected + ``` + +5. **Write output**: Write to ping-pong buffer + ```glsl + vox_out.density[idx3(c, gridResolution)] = clamp(out_density, 0.0, 1.0); + ``` + +### Raymarching (Fragment Shader) + +The `clouds.frag` shader composites volumes onto the HDR buffer: + +1. **Ray setup**: + - Reconstruct world-space ray from screen UV + - Define AABB from `volumeCenterLocal ± volumeHalfExtents` + - Compute ray-AABB intersection (`t0`, `t1`) + +2. **Geometry clipping**: + - Sample G-buffer position (`posTex`) + - If opaque geometry exists, clamp `t1` to surface distance + - Prevents clouds rendering behind solid objects + +3. **Raymarching loop**: + ```glsl + float transmittance = 1.0; + vec3 scattering = vec3(0.0); + + for (int i = 0; i < stepCount; ++i) { + vec3 p = camPos + rd * t; + float density = sample_voxel_density(p, bmin, bmax); + + // Apply coverage threshold + density = max(density - coverage, 0.0) * densityScale; + + // Beer-Lambert absorption + float extinction_coeff = density * extinction; + float step_transmittance = exp(-extinction_coeff * dt); + + // In-scattering (single-scattering approximation) + vec3 light_contrib = albedo * scatterStrength * density; + + // Flame emission + if (volumeType == 2) { + light_contrib += emissionColor * emissionStrength * density; + } + + scattering += transmittance * (1.0 - step_transmittance) * light_contrib; + transmittance *= step_transmittance; + + t += dt; + } + ``` + +4. **Composite**: + ```glsl + vec3 finalColor = baseColor * transmittance + scattering; + outColor = vec4(finalColor, 1.0); + ``` + +### Floating-Origin Stability + +When the world origin shifts (`CloudPass::update_time_and_origin_delta()`): +- Volumes with `followCameraXZ = false` are adjusted: `volumeCenterLocal -= origin_delta` +- Ensures volumes stay in the same world-space location despite coordinate changes + +### Volume Drift + +For non-camera-following volumes: +```cpp +volumeCenterLocal += volumeVelocityLocal * dt; +``` +Allows volumes to drift independently (e.g., moving storm clouds). + +## Memory Management + +### Voxel Buffers + +Each volume maintains two ping-pong buffers (`voxelDensity[2]`): +- **Read buffer**: Input to advection compute shader and raymarch fragment shader +- **Write buffer**: Output of advection compute shader +- Buffers swap each frame (`voxelReadIndex = 1 - voxelReadIndex`) + +Buffer size: `gridResolution³ × sizeof(float)` bytes +- Example: 64³ grid = 1 MB per buffer (2 MB total per volume) +- Maximum 4 volumes = 8 MB total (at 64³ resolution) + +### Lazy Allocation + +Voxel buffers are allocated only when: +- `enabled = true` +- `gridResolution` changes +- Called via `rebuild_voxel_density()` + +Initial density is procedurally generated using the same FBM noise as injection. + +## Render Graph Integration + +The cloud pass registers after lighting/SSR: + +```cpp +RGImageHandle CloudPass::register_graph(RenderGraph* graph, + RGImageHandle hdrInput, + RGImageHandle gbufPos) +{ + // For each enabled volume: + // 1. Optional: Add compute pass for voxel advection (if animateVoxels == true) + // 2. Add graphics pass for raymarching composite + + // Passes read/write ping-pong buffers and sample G-buffer depth + // Returns final HDR image with clouds composited +} +``` + +**Pass structure** (per volume): +1. **VoxelUpdate** (compute, optional): Read voxel buffer → advect → write voxel buffer +2. **Volumetrics** (graphics): Read HDR input + G-buffer + voxel buffer → raymarch → write HDR output + +Volumes are rendered sequentially (volume 0 → 1 → 2 → 3) to allow layered effects. + +## Performance Considerations + +- **Voxel resolution**: Higher resolution = better detail but 8× memory per doubling (64³ = 1 MB, 128³ = 8 MB) +- **Raymarch steps**: More steps = smoother results but linear fragment cost (48-128 typical) +- **Fill rate**: Volumetrics are fragment-shader intensive; reduce `stepCount` on low-end hardware +- **Advection cost**: Compute cost is `O(resolution³)` but typically <1ms for 64³ +- **Multi-volume overhead**: Each active volume adds a full raymarch pass; budget 2-3 volumes max + +### Recommended Settings + +**High quality (desktop)**: +```cpp +gridResolution = 128; +stepCount = 128; +``` + +**Medium quality (mid-range)**: +```cpp +gridResolution = 64; +stepCount = 64; +``` + +**Low quality (mobile/low-end)**: +```cpp +gridResolution = 32; +stepCount = 32; +``` + +## Parameter Reference + +### VoxelVolumeSettings + +```cpp +struct VoxelVolumeSettings +{ + // Enable/type + bool enabled{false}; + VoxelVolumeType type{Clouds}; // Clouds, Smoke, Flame + + // Positioning + bool followCameraXZ{false}; // Anchor to camera XZ + bool animateVoxels{true}; // Enable voxel simulation + glm::vec3 volumeCenterLocal{0,2,0}; + glm::vec3 volumeHalfExtents{8,8,8}; + glm::vec3 volumeVelocityLocal{0}; // Drift velocity (if !followCameraXZ) + + // Rendering + float densityScale{1.0}; // Density multiplier + float coverage{0.0}; // 0..1 threshold (higher = less dense) + float extinction{1.0}; // Absorption coefficient + int stepCount{48}; // Raymarch steps (8-256) + uint32_t gridResolution{48}; // Voxel grid resolution (4-256) + + // Simulation (advection) + glm::vec3 windVelocityLocal{0,2,0}; // Wind velocity (units/sec) + float dissipation{1.25}; // Density decay (1/sec) + float noiseStrength{1.0}; // Injection rate + float noiseScale{8.0}; // Noise frequency + float noiseSpeed{1.0}; // Time scale + + // Emitter (smoke/flame only) + glm::vec3 emitterUVW{0.5,0.05,0.5}; // Normalized (0..1) + float emitterRadius{0.18}; // Normalized (0..1) + + // Shading + glm::vec3 albedo{1,1,1}; // Scattering tint + float scatterStrength{1.0}; + glm::vec3 emissionColor{1,0.6,0.25};// Flame emission tint + float emissionStrength{0.0}; // Flame emission strength +}; +``` + +## Common Presets + +### Stratocumulus Clouds + +```cpp +cloud.type = Clouds; +cloud.followCameraXZ = true; +cloud.volumeCenterLocal = glm::vec3(0, 80, 0); +cloud.volumeHalfExtents = glm::vec3(200, 30, 200); +cloud.windVelocityLocal = glm::vec3(3, 1, 0); +cloud.dissipation = 0.3f; +cloud.densityScale = 1.2f; +cloud.coverage = 0.4f; +cloud.gridResolution = 64; +cloud.stepCount = 64; +``` + +### Torch Flame + +```cpp +flame.type = Flame; +flame.followCameraXZ = false; +flame.volumeCenterLocal = glm::vec3(0, 1.5, 0); +flame.volumeHalfExtents = glm::vec3(0.3, 0.8, 0.3); +flame.windVelocityLocal = glm::vec3(0, 6, 0); +flame.dissipation = 2.5f; +flame.noiseStrength = 2.0f; +flame.emitterUVW = glm::vec3(0.5, 0.1, 0.5); +flame.emitterRadius = 0.25f; +flame.emissionColor = glm::vec3(1.0, 0.4, 0.1); +flame.emissionStrength = 4.0f; +flame.gridResolution = 32; +flame.stepCount = 32; +``` + +### Smoke Plume + +```cpp +smoke.type = Smoke; +smoke.followCameraXZ = false; +smoke.volumeCenterLocal = glm::vec3(0, 2, 0); +smoke.volumeHalfExtents = glm::vec3(2, 5, 2); +smoke.windVelocityLocal = glm::vec3(1, 4, 0); +smoke.dissipation = 1.0f; +smoke.noiseStrength = 1.2f; +smoke.emitterUVW = glm::vec3(0.5, 0.05, 0.5); +smoke.emitterRadius = 0.15f; +smoke.albedo = glm::vec3(0.4, 0.4, 0.4); +smoke.scatterStrength = 0.8f; +smoke.gridResolution = 48; +smoke.stepCount = 48; +``` + +## Troubleshooting + +**Volumes not visible**: +- Ensure `enabled = true` and `volumetrics_enabled = true` globally +- Check AABB intersects camera frustum +- Reduce `coverage` (lower = denser) +- Increase `densityScale` + +**Blocky/noisy appearance**: +- Increase `gridResolution` (64 → 128) +- Increase `stepCount` (48 → 96) +- Adjust `noiseScale` for finer detail + +**Performance issues**: +- Reduce `gridResolution` (64 → 32) +- Reduce `stepCount` (64 → 32) +- Disable `animateVoxels` for static volumes +- Reduce number of active volumes + +**Volumes don't animate**: +- Ensure `animateVoxels = true` +- Check `windVelocityLocal` is non-zero +- Verify `noiseStrength > 0` and `noiseSpeed > 0` + +**Volumes flicker/pop**: +- Increase `dissipation` to smooth density changes +- Lower `noiseStrength` for subtler injection +- Use higher `gridResolution` for temporal stability + +## API Reference + +### GameAPI::Engine Volumetric Methods + +```cpp +// Global enable/disable +void set_volumetrics_enabled(bool enabled); +bool get_volumetrics_enabled() const; + +// Volume configuration (index 0-3) +void set_voxel_volume(int index, const VoxelVolumeSettings& settings); +VoxelVolumeSettings get_voxel_volume(int index) const; + +// Retrieve all volumes +std::vector get_voxel_volumes() const; +``` + +### CloudPass + +```cpp +class CloudPass : public IRenderPass +{ + // Render graph registration + RGImageHandle register_graph(RenderGraph* graph, + RGImageHandle hdrInput, + RGImageHandle gbufPos); + + // Internal voxel management + void rebuild_voxel_density(uint32_t volume_index, + uint32_t resolution, + const VoxelVolumeSettings& settings); +}; +``` + +## See Also + +- `docs/ParticleSystem.md` — GPU particle system documentation +- `docs/RenderGraph.md` — Render graph integration details +- `docs/RenderPasses.md` — Pass execution and pipeline management +- `docs/GameAPI.md` — High-level game API +- `docs/Compute.md` — Compute pipeline details diff --git a/shaders/background_env.frag b/shaders/background_env.frag index fc0e2b3..079f6a2 100644 --- a/shaders/background_env.frag +++ b/shaders/background_env.frag @@ -10,10 +10,11 @@ void main() { // Reconstruct world-space direction from screen UV vec2 ndc = inUV * 2.0 - 1.0; // [-1,1] - vec4 clip = vec4(ndc, 1.0, 1.0); - vec4 vpos = inverse(sceneData.proj) * clip; - vec3 viewDir = normalize(vpos.xyz / max(vpos.w, 1e-6)); - vec3 worldDir = normalize((inverse(sceneData.view) * vec4(viewDir, 0.0)).xyz); + + // Avoid per-pixel matrix inverses. With a perspective projection, a view-space ray can be + // reconstructed directly from the projection diagonal and then rotated to world space. + vec3 viewDir = normalize(vec3(ndc.x / sceneData.proj[0][0], ndc.y / sceneData.proj[1][1], -1.0)); + vec3 worldDir = normalize(transpose(mat3(sceneData.view)) * viewDir); vec2 uv = dir_to_equirect(worldDir); // Sample a dedicated background environment map when available. diff --git a/shaders/deferred_lighting.frag b/shaders/deferred_lighting.frag index fbb3b36..726d911 100644 --- a/shaders/deferred_lighting.frag +++ b/shaders/deferred_lighting.frag @@ -35,6 +35,17 @@ const float SHADOW_MIN_BIAS = 1e-5; const float SHADOW_RAY_TMIN = 0.02;// start a bit away from the surface const float SHADOW_RAY_ORIGIN_BIAS = 0.01;// world units +vec3 getCameraWorldPosition() +{ + // view = [ R^T -R^T*C ] + // [ 0 1 ] + // => C = -R * T, where T is view[3].xyz and R = transpose(mat3(view)) + mat3 rotT = mat3(sceneData.view); + mat3 rot = transpose(rotT); + vec3 T = sceneData.view[3].xyz; + return -rot * T; +} + float hash12(vec2 p) { vec3 p3 = fract(vec3(p.xyx) * 0.1031); @@ -290,7 +301,7 @@ void main(){ float ao = extraSample.x; vec3 emissive = extraSample.yzw; - vec3 camPos = vec3(inverse(sceneData.view)[3]); + vec3 camPos = getCameraWorldPosition(); vec3 V = normalize(camPos - pos); // Directional sun term using evaluate_brdf + cascaded shadowing diff --git a/shaders/deferred_lighting_nort.frag b/shaders/deferred_lighting_nort.frag index e4412b3..80e61f3 100644 --- a/shaders/deferred_lighting_nort.frag +++ b/shaders/deferred_lighting_nort.frag @@ -27,6 +27,17 @@ const float SHADOW_RPDB_SCALE = 1.0; // Minimum clamp to keep a tiny bias even on perpendicular receivers const float SHADOW_MIN_BIAS = 1e-5; +vec3 getCameraWorldPosition() +{ + // view = [ R^T -R^T*C ] + // [ 0 1 ] + // => C = -R * T, where T is view[3].xyz and R = transpose(mat3(view)) + mat3 rotT = mat3(sceneData.view); + mat3 rot = transpose(rotT); + vec3 T = sceneData.view[3].xyz; + return -rot * T; +} + float hash12(vec2 p) { vec3 p3 = fract(vec3(p.xyx) * 0.1031); @@ -219,7 +230,7 @@ void main(){ float ao = extraSample.x; vec3 emissive = extraSample.yzw; - vec3 camPos = vec3(inverse(sceneData.view)[3]); + vec3 camPos = getCameraWorldPosition(); vec3 V = normalize(camPos - pos); // Directional sun term using evaluate_brdf + cascaded shadowing diff --git a/shaders/gbuffer.frag b/shaders/gbuffer.frag index 2dcf9a8..2ef736e 100644 --- a/shaders/gbuffer.frag +++ b/shaders/gbuffer.frag @@ -30,6 +30,7 @@ layout(buffer_reference, std430) readonly buffer VertexBuffer{ layout(push_constant) uniform constants { mat4 render_matrix; + mat3 normal_matrix; VertexBuffer vertexBuffer; uint objectID; } PushConstants; diff --git a/shaders/mesh.frag b/shaders/mesh.frag index 6d7bb18..59d300a 100644 --- a/shaders/mesh.frag +++ b/shaders/mesh.frag @@ -13,6 +13,17 @@ layout (location = 4) in vec4 inTangent; layout (location = 0) out vec4 outFragColor; +vec3 getCameraWorldPosition() +{ + // view = [ R^T -R^T*C ] + // [ 0 1 ] + // => C = -R * T, where T is view[3].xyz and R = transpose(mat3(view)) + mat3 rotT = mat3(sceneData.view); + mat3 rot = transpose(rotT); + vec3 T = sceneData.view[3].xyz; + return -rot * T; +} + void main() { // Base color with material factor and texture @@ -43,7 +54,7 @@ void main() vec3 T = normalize(inTangent.xyz); vec3 B = normalize(cross(Nn, T)) * inTangent.w; vec3 N = normalize(T * Nm.x + B * Nm.y + Nn * Nm.z); - vec3 camPos = vec3(inverse(sceneData.view)[3]); + vec3 camPos = getCameraWorldPosition(); vec3 V = normalize(camPos - inWorldPos); // Directional sun term (no shadows in forward path) diff --git a/shaders/mesh.vert b/shaders/mesh.vert index fee5ffa..4a71640 100644 --- a/shaders/mesh.vert +++ b/shaders/mesh.vert @@ -29,6 +29,7 @@ layout(buffer_reference, std430) readonly buffer VertexBuffer{ layout(push_constant) uniform constants { mat4 render_matrix; + mat3 normal_matrix; VertexBuffer vertexBuffer; uint objectID; } PushConstants; @@ -37,8 +38,7 @@ void main() { Vertex v = PushConstants.vertexBuffer.vertices[gl_VertexIndex]; - mat3 M = mat3(PushConstants.render_matrix); - mat3 normalMatrix = transpose(inverse(M)); + mat3 normalMatrix = PushConstants.normal_matrix; vec4 worldPos = PushConstants.render_matrix * vec4(v.position, 1.0); gl_Position = sceneData.viewproj * worldPos; @@ -52,4 +52,3 @@ void main() outUV = vec2(v.uv_x, v.uv_y); outWorldPos = worldPos.xyz; } - diff --git a/shaders/ssr.frag b/shaders/ssr.frag index 6b556fd..b2c5d7d 100644 --- a/shaders/ssr.frag +++ b/shaders/ssr.frag @@ -20,9 +20,9 @@ vec3 getCameraWorldPosition() return -rot * T; // C = -R * T } -vec3 projectToScreen(vec3 worldPos) +vec3 projectToScreenFromView(vec3 viewPos) { - vec4 clip = sceneData.viewproj * vec4(worldPos, 1.0); + vec4 clip = sceneData.proj * vec4(viewPos, 1.0); if (clip.w <= 0.0) return vec3(0.0, 0.0, -1.0); @@ -64,6 +64,8 @@ void main() vec3 camPos = getCameraWorldPosition(); vec3 V = normalize(camPos - worldPos); vec3 R = reflect(-V, N); + vec3 viewPos = (sceneData.view * vec4(worldPos, 1.0)).xyz; + vec3 viewDir = normalize((sceneData.view * vec4(R, 0.0)).xyz); float gloss = 1.0 - roughness; float F0 = mix(0.04, 1.0, metallic); @@ -87,9 +89,9 @@ void main() float t = STEP_LENGTH; for (int i = 0; i < maxSteps && t <= MAX_DISTANCE; ++i, t += STEP_LENGTH) { - vec3 samplePos = worldPos + R * t; + vec3 sampleViewPos = viewPos + viewDir * t; - vec3 proj = projectToScreen(samplePos); + vec3 proj = projectToScreenFromView(sampleViewPos); if (proj.z < 0.0) { break; @@ -102,10 +104,9 @@ void main() continue; } - vec3 viewSample = (sceneData.view * vec4(samplePos, 1.0)).xyz; vec3 viewScene = (sceneData.view * vec4(scenePosSample.xyz, 1.0)).xyz; - float depthRay = -viewSample.z; + float depthRay = -sampleViewPos.z; float depthScene = -viewScene.z; float depthDiff = depthRay - depthScene; diff --git a/shaders/ssr_rt.frag b/shaders/ssr_rt.frag index 5386781..cb66428 100644 --- a/shaders/ssr_rt.frag +++ b/shaders/ssr_rt.frag @@ -25,9 +25,9 @@ vec3 getCameraWorldPosition() return -rot * T; // C = -R * T } -vec3 projectToScreen(vec3 worldPos) +vec3 projectToScreenFromView(vec3 viewPos) { - vec4 clip = sceneData.viewproj * vec4(worldPos, 1.0); + vec4 clip = sceneData.proj * vec4(viewPos, 1.0); if (clip.w <= 0.0) { @@ -71,6 +71,8 @@ void main() vec3 camPos = getCameraWorldPosition(); vec3 V = normalize(camPos - worldPos); vec3 R = reflect(-V, N); + vec3 viewPos = (sceneData.view * vec4(worldPos, 1.0)).xyz; + vec3 viewDir = normalize((sceneData.view * vec4(R, 0.0)).xyz); float gloss = 1.0 - roughness; float F0 = mix(0.04, 1.0, metallic); @@ -107,9 +109,9 @@ void main() float t = STEP_LENGTH_SSR; for (int i = 0; i < maxSteps && t <= MAX_DISTANCE_SSR; ++i, t += STEP_LENGTH_SSR) { - vec3 samplePos = worldPos + R * t; + vec3 sampleViewPos = viewPos + viewDir * t; - vec3 proj = projectToScreen(samplePos); + vec3 proj = projectToScreenFromView(sampleViewPos); if (proj.z < 0.0) { break; @@ -122,10 +124,9 @@ void main() continue; } - vec3 viewSample = (sceneData.view * vec4(samplePos, 1.0)).xyz; vec3 viewScene = (sceneData.view * vec4(scenePosSample.xyz, 1.0)).xyz; - float depthRay = -viewSample.z; + float depthRay = -sampleViewPos.z; float depthScene = -viewScene.z; float depthDiff = depthRay - depthScene; @@ -190,7 +191,7 @@ void main() float tHit = rayQueryGetIntersectionTEXT(rq, true); vec3 hitPos = origin + R * tHit; - vec3 proj = projectToScreen(hitPos); + vec3 proj = projectToScreenFromView((sceneData.view * vec4(hitPos, 1.0)).xyz); if (proj.z >= 0.0) { vec2 hitUV = proj.xy; diff --git a/shaders/tonemap.frag b/shaders/tonemap.frag index a788fd8..5695b0c 100644 --- a/shaders/tonemap.frag +++ b/shaders/tonemap.frag @@ -31,36 +31,50 @@ vec3 aces_tonemap(vec3 x) return clamp((x*(a*x+b))/(x*(c*x+d)+e), 0.0, 1.0); } +void accum_bloom(vec3 c, float kernel_weight, inout vec3 bloom, inout float weight_sum) +{ + float bright = max(max(c.r, c.g), c.b) - pc.bloomThreshold; + bright = max(bright, 0.0); + + // Match the old behavior: only normalize over samples that pass the threshold. + float contribute = step(1e-5, bright); + + bloom += c * bright * kernel_weight; + weight_sum += kernel_weight * contribute; +} + void main() { vec3 hdr = texture(uHdr, inUV).rgb; - // Simple bloom in HDR space: gather bright neighbors and add a small blurred contribution. - if (pc.bloomEnabled != 0) + // Simple bloom in HDR space: approximate a 5x5 Gaussian blur using 9 bilinear samples (vs. 25 taps). + if (pc.bloomEnabled != 0 && pc.bloomIntensity > 0.0) { vec2 texel = 1.0 / vec2(textureSize(uHdr, 0)); + vec2 d = texel * 1.2; // Combines 1- and 2-texel taps via linear filtering (4:1 weight). + vec3 bloom = vec3(0.0); - int radius = 2; - int count = 0; - for (int x = -radius; x <= radius; ++x) + float wsum = 0.0; + + // 1D weights [1 4 6 4 1] collapsed to 3 linear samples => weights [5 6 5] + // 2D separable => center 36, axis 30, corners 25 (sum 256). + accum_bloom(hdr, 36.0, bloom, wsum); // reuse center sample + + accum_bloom(texture(uHdr, clamp(inUV + vec2( d.x, 0.0), vec2(0.0), vec2(1.0))).rgb, 30.0, bloom, wsum); + accum_bloom(texture(uHdr, clamp(inUV + vec2(-d.x, 0.0), vec2(0.0), vec2(1.0))).rgb, 30.0, bloom, wsum); + accum_bloom(texture(uHdr, clamp(inUV + vec2(0.0, d.y), vec2(0.0), vec2(1.0))).rgb, 30.0, bloom, wsum); + accum_bloom(texture(uHdr, clamp(inUV + vec2(0.0, -d.y), vec2(0.0), vec2(1.0))).rgb, 30.0, bloom, wsum); + + accum_bloom(texture(uHdr, clamp(inUV + vec2( d.x, d.y), vec2(0.0), vec2(1.0))).rgb, 25.0, bloom, wsum); + accum_bloom(texture(uHdr, clamp(inUV + vec2(-d.x, d.y), vec2(0.0), vec2(1.0))).rgb, 25.0, bloom, wsum); + accum_bloom(texture(uHdr, clamp(inUV + vec2( d.x, -d.y), vec2(0.0), vec2(1.0))).rgb, 25.0, bloom, wsum); + accum_bloom(texture(uHdr, clamp(inUV + vec2(-d.x, -d.y), vec2(0.0), vec2(1.0))).rgb, 25.0, bloom, wsum); + + if (wsum > 0.0) { - for (int y = -radius; y <= radius; ++y) - { - vec2 offset = vec2(x, y) * texel; - vec3 c = texture(uHdr, clamp(inUV + offset, vec2(0.0), vec2(1.0))).rgb; - float bright = max(max(c.r, c.g), c.b) - pc.bloomThreshold; - if (bright > 0.0) - { - bloom += c * bright; - count++; - } - } + bloom /= wsum; + hdr += pc.bloomIntensity * bloom; } - if (count > 0) - { - bloom /= float(count); - } - hdr += pc.bloomIntensity * bloom; } // Simple exposure @@ -77,4 +91,3 @@ void main() outColor = vec4(mapped, 1.0); } - diff --git a/src/core/types.h b/src/core/types.h index 1deaf62..ade8f6b 100644 --- a/src/core/types.h +++ b/src/core/types.h @@ -28,6 +28,7 @@ inline const char *string_VkFormat(VkFormat) { return "VkFormat"; } #include #include +#include #include #include #include @@ -194,9 +195,16 @@ struct GPUMeshBuffers { // push constants for our mesh object draws struct GPUDrawPushConstants { glm::mat4 worldMatrix; + // std140-compatible representation of mat3 (3 x vec4 columns; w unused). + glm::mat3x4 normalMatrix; VkDeviceAddress vertexBuffer; uint32_t objectID; }; +static_assert(offsetof(GPUDrawPushConstants, worldMatrix) == 0); +static_assert(offsetof(GPUDrawPushConstants, normalMatrix) == 64); +static_assert(offsetof(GPUDrawPushConstants, vertexBuffer) == 112); +static_assert(offsetof(GPUDrawPushConstants, objectID) == 120); +static_assert(sizeof(GPUDrawPushConstants) == 128); struct DrawContext; diff --git a/src/render/passes/geometry.cpp b/src/render/passes/geometry.cpp index a0ec752..2f83ca2 100644 --- a/src/render/passes/geometry.cpp +++ b/src/render/passes/geometry.cpp @@ -281,6 +281,12 @@ void GeometryPass::draw_geometry(VkCommandBuffer cmd, } GPUDrawPushConstants push_constants{}; push_constants.worldMatrix = r.transform; + { + const glm::mat3 n = glm::transpose(glm::inverse(glm::mat3(r.transform))); + push_constants.normalMatrix[0] = glm::vec4(n[0], 0.0f); + push_constants.normalMatrix[1] = glm::vec4(n[1], 0.0f); + push_constants.normalMatrix[2] = glm::vec4(n[2], 0.0f); + } push_constants.vertexBuffer = r.vertexBufferAddress; push_constants.objectID = r.objectID; diff --git a/src/render/passes/shadow.cpp b/src/render/passes/shadow.cpp index 93ed6d9..21451c1 100644 --- a/src/render/passes/shadow.cpp +++ b/src/render/passes/shadow.cpp @@ -19,6 +19,22 @@ #include "core/types.h" #include "core/config.h" +namespace +{ +struct ShadowPushConstants +{ + glm::mat4 render_matrix; + VkDeviceAddress vertexBuffer; + uint32_t objectID; + uint32_t cascadeIndex; +}; +static_assert(offsetof(ShadowPushConstants, render_matrix) == 0); +static_assert(offsetof(ShadowPushConstants, vertexBuffer) == 64); +static_assert(offsetof(ShadowPushConstants, objectID) == 72); +static_assert(offsetof(ShadowPushConstants, cascadeIndex) == 76); +static_assert(sizeof(ShadowPushConstants) == 80); +} // namespace + void ShadowPass::init(EngineContext *context) { _context = context; @@ -29,10 +45,7 @@ void ShadowPass::init(EngineContext *context) // Keep push constants matching current shader layout for now VkPushConstantRange pc{}; pc.offset = 0; - // Push constants layout in shadow.vert is GPUDrawPushConstants + cascade index, rounded to 16 bytes - const uint32_t pcRaw = static_cast(sizeof(GPUDrawPushConstants) + sizeof(uint32_t)); - const uint32_t pcAligned = (pcRaw + 15u) & ~15u; // 16-byte alignment to match std430 expectations - pc.size = pcAligned; + pc.size = static_cast(sizeof(ShadowPushConstants)); pc.stageFlags = VK_SHADER_STAGE_VERTEX_BIT; GraphicsPipelineCreateInfo info{}; @@ -180,12 +193,6 @@ void ShadowPass::draw_shadow(VkCommandBuffer cmd, const DrawContext &dc = ctxLocal->getMainDrawContext(); VkBuffer lastIndexBuffer = VK_NULL_HANDLE; - - struct ShadowPC - { - GPUDrawPushConstants draw; - uint32_t cascadeIndex; - }; for (const auto &r : dc.OpaqueSurfaces) { if (r.indexBuffer != lastIndexBuffer) @@ -194,12 +201,12 @@ void ShadowPass::draw_shadow(VkCommandBuffer cmd, vkCmdBindIndexBuffer(cmd, r.indexBuffer, 0, VK_INDEX_TYPE_UINT32); } - ShadowPC spc{}; - spc.draw.worldMatrix = r.transform; - spc.draw.vertexBuffer = r.vertexBufferAddress; - spc.draw.objectID = r.objectID; + ShadowPushConstants spc{}; + spc.render_matrix = r.transform; + spc.vertexBuffer = r.vertexBufferAddress; + spc.objectID = r.objectID; spc.cascadeIndex = cascadeIndex; - vkCmdPushConstants(cmd, layout, VK_SHADER_STAGE_VERTEX_BIT, 0, sizeof(ShadowPC), &spc); + vkCmdPushConstants(cmd, layout, VK_SHADER_STAGE_VERTEX_BIT, 0, sizeof(ShadowPushConstants), &spc); vkCmdDrawIndexed(cmd, r.indexCount, 1, r.firstIndex, 0, 0); } } diff --git a/src/render/passes/transparent.cpp b/src/render/passes/transparent.cpp index 342e1bd..43af337 100644 --- a/src/render/passes/transparent.cpp +++ b/src/render/passes/transparent.cpp @@ -195,6 +195,12 @@ void TransparentPass::draw_transparent(VkCommandBuffer cmd, } GPUDrawPushConstants push{}; push.worldMatrix = r.transform; + { + const glm::mat3 n = glm::transpose(glm::inverse(glm::mat3(r.transform))); + push.normalMatrix[0] = glm::vec4(n[0], 0.0f); + push.normalMatrix[1] = glm::vec4(n[1], 0.0f); + push.normalMatrix[2] = glm::vec4(n[2], 0.0f); + } push.vertexBuffer = r.vertexBufferAddress; push.objectID = r.objectID; vkCmdPushConstants(cmd, r.material->pipeline->layout,