ADD: Docs and shader optim

This commit is contained in:
2025-12-25 22:09:02 +09:00
parent 0172996e12
commit d6216b20fc
16 changed files with 1178 additions and 60 deletions

366
docs/ParticleSystem.md Normal file
View File

@@ -0,0 +1,366 @@
# Particle System
The particle system provides GPU-accelerated particle simulation and rendering with support for flipbook animation, soft particles, and alpha/additive blending.
## Architecture Overview
The system is implemented across multiple components:
- **ParticlePass** (`src/render/passes/particles.h/.cpp`) — Render pass managing particle pools, compute pipelines, and graphics pipelines
- **GameAPI** (`src/core/game_api.h`) — High-level API for creating and controlling particle systems
- **Shaders** — Compute and graphics shaders for simulation and rendering
- `shaders/particles_update.comp` — Per-particle physics simulation
- `shaders/particles_sort_blocks.comp` — Block-level depth sorting for alpha blending
- `shaders/particles_build_indices.comp` — Build draw indices from sorted blocks
- `shaders/particles.vert/.frag` — Vertex/fragment shaders for rendering
## Key Features
- **Global particle pool**: Up to 128K particles (`k_max_particles = 128 * 1024`) shared across all systems
- **GPU simulation**: Fully GPU-driven via compute shaders (no CPU readback)
- **Flipbook animation**: Supports sprite sheet animation with configurable atlas layout and FPS
- **Soft particles**: Depth-aware fading near opaque geometry
- **Blend modes**: Additive (fire, sparks) and Alpha (smoke, debris) with automatic depth sorting
- **Noise distortion**: Optional UV distortion for organic motion
- **Floating-origin stable**: Automatically adjusts particle positions when world origin shifts
## Particle Data Layout
Each particle is represented as 64 bytes (4 × vec4) on the GPU:
```glsl
struct Particle
{
vec4 pos_age; // xyz = local position, w = remaining life (seconds)
vec4 vel_life; // xyz = local velocity, w = total lifetime (seconds)
vec4 color; // rgba
vec4 misc; // x=size, y=random seed, z/w=unused
};
```
## Creating Particle Systems
### Via GameAPI
```cpp
#include "core/game_api.h"
GameAPI::Engine api(&engine);
// Create a particle system with 1024 particles
uint32_t systemId = api.create_particle_system(1024);
// Configure parameters
GameAPI::ParticleSystem sys = api.get_particle_system(systemId);
sys.enabled = true;
sys.reset = true; // Respawn all particles immediately
sys.blendMode = GameAPI::ParticleBlendMode::Additive;
// Emitter settings
sys.params.emitterPosLocal = glm::vec3(0.0f, 0.0f, 0.0f);
sys.params.spawnRadius = 0.1f;
sys.params.emitterDirLocal = glm::vec3(0.0f, 1.0f, 0.0f); // Upward
sys.params.coneAngleDegrees = 20.0f;
// Particle properties
sys.params.minSpeed = 2.0f;
sys.params.maxSpeed = 8.0f;
sys.params.minLife = 0.5f;
sys.params.maxLife = 1.5f;
sys.params.minSize = 0.05f;
sys.params.maxSize = 0.15f;
// Physics
sys.params.drag = 1.0f;
sys.params.gravity = 0.0f; // Positive pulls down -Y in local space
// Appearance
sys.params.color = glm::vec4(1.0f, 0.5f, 0.1f, 1.0f); // Orange
// Flipbook animation (16×4 atlas, 30 FPS)
sys.flipbookTexture = "vfx/flame.ktx2";
sys.params.flipbookCols = 16;
sys.params.flipbookRows = 4;
sys.params.flipbookFps = 30.0f;
sys.params.flipbookIntensity = 1.0f;
// Noise distortion
sys.noiseTexture = "vfx/simplex.ktx2";
sys.params.noiseScale = 6.0f;
sys.params.noiseStrength = 0.05f;
sys.params.noiseScroll = glm::vec2(0.0f, 0.0f);
// Soft particles
sys.params.softDepthDistance = 0.15f; // Fade particles within 0.15 units of geometry
api.set_particle_system(systemId, sys);
```
### Direct API
```cpp
ParticlePass* particlePass = /* obtain from RenderPassManager */;
// Create system
uint32_t systemId = particlePass->create_system(1024);
// Access and modify
auto& systems = particlePass->systems();
for (auto& sys : systems)
{
if (sys.id == systemId)
{
sys.enabled = true;
sys.params.color = glm::vec4(1.0f, 0.0f, 0.0f, 1.0f);
break;
}
}
```
## Simulation Details
### Update Pipeline (Compute)
The `particles_update.comp` shader runs once per frame for each active system:
1. **Floating-origin correction**: `p.pos_age.xyz -= origin_delta` keeps particles stable when the world origin shifts
2. **Respawn check**: Dead particles (`age <= 0`) or reset flag respawns particles with randomized properties
3. **Physics integration**:
- Apply gravity: `vel += vec3(0, -gravity, 0) * dt`
- Apply drag: `vel *= exp(-drag * dt)`
- Integrate position: `pos += vel * dt`
4. **Age decrement**: `age -= dt`
Random number generation uses a per-particle seed (`misc.y`) combined with system time to ensure deterministic but varied behavior.
### Cone Emission
When `coneAngleDegrees > 0`, particles are emitted within a cone:
- Cone axis is `emitterDirLocal`
- Particles are randomly distributed within the cone solid angle
- `coneAngleDegrees = 0` emits in a single direction
- `coneAngleDegrees < 0` emits in all directions (sphere)
### Spawn Radius
Particles spawn at `emitterPosLocal ± random_in_sphere(spawnRadius)`.
## Rendering Pipeline
### Blend Modes
**Additive** (`BlendMode::Additive`):
- Source: `VK_BLEND_FACTOR_SRC_ALPHA`
- Dest: `VK_BLEND_FACTOR_ONE`
- No depth sorting required
- Ideal for fire, sparks, energy effects
**Alpha** (`BlendMode::Alpha`):
- Source: `VK_BLEND_FACTOR_SRC_ALPHA`
- Dest: `VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA`
- Block-level depth sorting (256 particles per block)
- Better for smoke, debris, leaves
### Alpha Sorting
For alpha-blended systems:
1. **Block sorting** (`particles_sort_blocks.comp`): Divides particles into 256-particle blocks, computes average depth per block, sorts blocks back-to-front
2. **Index building** (`particles_build_indices.comp`): Writes sorted particle indices into `_draw_indices` buffer
3. **Rendering**: Vertex shader reads particles via indirection: `Particle p = pool.particles[indices[gl_InstanceIndex]]`
This provides coarse-grained sorting with minimal compute overhead (512 blocks max).
### Soft Particles
Fragment shader samples G-buffer depth (`gbufferPosition.w`) and fades particle alpha near intersections:
```glsl
float sceneDepth = texture(posTex, screenUV).w;
float particleDepth = /* compute from world pos */;
float depthDiff = sceneDepth - particleDepth;
float softFactor = smoothstep(0.0, softDepthDistance, depthDiff);
outColor.a *= softFactor;
```
Set `softDepthDistance = 0` to disable.
### Flipbook Animation
The fragment shader samples an animated sprite sheet:
1. Compute frame index: `frameIndex = int(time_sec * flipbookFps) % (flipbookCols * flipbookRows)`
2. Map frame to UV rect: `(col, row) = (frameIndex % cols, frameIndex / cols)`
3. Sample texture: `color = texture(flipbookTex, baseUV * cellSize + cellOffset)`
### Noise Distortion
Optional UV distortion using a noise texture:
```glsl
vec2 noiseUV = uv * noiseScale + noiseScroll * time_sec;
vec2 distortion = (texture(noiseTex, noiseUV).rg - 0.5) * 2.0 * noiseStrength;
vec2 finalUV = uv + distortion;
```
## Memory Management
### Particle Pool Allocation
The global pool is pre-allocated (128K particles × 64 bytes = 8 MB) and subdivided into ranges:
- `create_system(count)`: Allocates a contiguous range from `_free_ranges`
- `destroy_system(id)`: Returns range to free list and merges adjacent ranges
- `resize_system(id, new_count)`: Reallocates (may move particles)
Allocation uses a simple first-fit strategy with automatic coalescing.
### Texture Caching
VFX textures (flipbook/noise) are loaded on-demand and cached in `_vfx_textures`:
- `preload_vfx_texture(assetName)`: Explicitly load texture (safe to call from UI)
- `preload_needed_textures()`: Load all textures referenced by active systems (call before ResourceUploads pass)
- Fallback 1×1 textures (`_fallback_flipbook`, `_fallback_noise`) are used when load fails
## Render Graph Integration
The particle pass registers into the render graph after lighting and SSR:
```cpp
void ParticlePass::register_graph(RenderGraph* graph,
RGImageHandle hdrTarget,
RGImageHandle depthHandle,
RGImageHandle gbufferPosition)
{
graph->add_pass("Particles", RGPassType::Graphics,
[=](RGPassBuilder& b, EngineContext*) {
b.write_color(hdrTarget); // Composite onto HDR
b.read_depth(depthHandle); // Depth test
b.sample_image(gbufferPosition); // Soft particles
},
[this](VkCommandBuffer cmd, const RGPassResources& res, EngineContext* ctx) {
// 1. Run compute update for each system
// 2. For alpha systems: sort blocks + build indices
// 3. Render all systems (additive first, then alpha)
}
);
}
```
## Performance Considerations
- **Particle count**: 128K global limit; budget carefully across systems
- **Overdraw**: Additive blending is fill-rate intensive; keep particle size and count moderate
- **Sorting cost**: Alpha systems incur compute overhead for block sorting (~512 blocks × 256 particles)
- **Texture bandwidth**: Flipbook textures should be compressed (KTX2) and atlased (16×4 common)
- **Soft particles**: G-buffer read adds bandwidth; disable if depth fading isn't visible
## Common Presets
### Fire
```cpp
sys.blendMode = Additive;
sys.params.color = glm::vec4(1.0f, 0.5f, 0.1f, 1.0f); // Orange
sys.params.gravity = 0.0f;
sys.params.minSpeed = 1.0f; sys.params.maxSpeed = 3.0f;
sys.params.drag = 0.5f;
sys.flipbookTexture = "vfx/flame.ktx2";
```
### Smoke
```cpp
sys.blendMode = Alpha;
sys.params.color = glm::vec4(0.3f, 0.3f, 0.3f, 0.5f); // Gray, semi-transparent
sys.params.gravity = -2.0f; // Rise upward (negative gravity)
sys.params.drag = 1.5f; // Slow down quickly
sys.params.minSpeed = 0.5f; sys.params.maxSpeed = 2.0f;
sys.noiseTexture = "vfx/simplex.ktx2";
sys.params.noiseStrength = 0.2f; // Strong distortion
```
### Sparks
```cpp
sys.blendMode = Additive;
sys.params.color = glm::vec4(1.0f, 0.8f, 0.2f, 1.0f); // Bright yellow
sys.params.gravity = 9.8f; // Fall downward
sys.params.drag = 0.1f;
sys.params.minSpeed = 5.0f; sys.params.maxSpeed = 15.0f;
sys.params.minSize = 0.01f; sys.params.maxSize = 0.03f; // Small
sys.flipbookTexture = ""; // Disable flipbook (procedural sprite)
```
## Troubleshooting
**Particles not visible**:
- Ensure `enabled = true` and `particleCount > 0`
- Check `color.a > 0` (fully transparent particles are invisible)
- Verify system is allocated: `api.get_particle_systems()` should list the ID
**Particles flickering or popping**:
- Set `reset = false` after first frame (reset respawns all particles immediately)
- Increase `minLife`/`maxLife` to prevent frequent respawning
**Performance issues**:
- Reduce total particle count (check `allocated_particles()`)
- Use additive blend for most systems (cheaper than alpha)
- Reduce flipbook texture resolution or mip levels
**Textures missing**:
- Call `preload_vfx_texture("vfx/texture.ktx2")` before first frame
- Or call `preload_needed_textures()` in engine setup
- Check AssetManager can resolve path: `assetPath("vfx/texture.ktx2")`
## API Reference
### ParticlePass
```cpp
class ParticlePass : public IRenderPass
{
// System management
uint32_t create_system(uint32_t count);
bool destroy_system(uint32_t id);
bool resize_system(uint32_t id, uint32_t new_count);
std::vector<System>& systems();
const std::vector<System>& systems() const;
// Pool stats
uint32_t allocated_particles() const;
uint32_t free_particles() const;
// Texture preloading
void preload_vfx_texture(const std::string& assetName);
void preload_needed_textures();
};
```
### GameAPI::Engine Particle Methods
```cpp
// System creation/destruction
uint32_t create_particle_system(uint32_t particle_count);
bool destroy_particle_system(uint32_t system_id);
// System control
void set_particle_system(uint32_t system_id, const ParticleSystem& sys);
ParticleSystem get_particle_system(uint32_t system_id) const;
std::vector<ParticleSystem> get_particle_systems() const;
// Pool stats
uint32_t get_particle_pool_allocated() const;
uint32_t get_particle_pool_free() const;
// Texture preloading
void preload_particle_texture(const std::string& asset_path);
```
## See Also
- `docs/RenderGraph.md` — Render graph integration details
- `docs/RenderPasses.md` — Pass execution and pipeline management
- `docs/GameAPI.md` — High-level game API
- `docs/TextureLoading.md` — Asset loading and streaming

189
docs/README.md Normal file
View File

@@ -0,0 +1,189 @@
# Vulkan Engine Documentation
Welcome to the Vulkan Engine documentation. This engine is a modern, high-performance rendering engine built with Vulkan, featuring a deferred PBR pipeline, GPU-driven systems, and comprehensive tooling for game development.
## Quick Start
- **[BUILD.md](BUILD.md)** — Build instructions, dependencies, and platform-specific setup
- **[RUNTIME.md](RUNTIME.md)** — Runtime architecture and execution flow
- **[TROUBLESHOOTING.md](TROUBLESHOOTING.md)** — Common issues and solutions
## Core Architecture
### Engine Foundation
- **[EngineContext.md](EngineContext.md)** — Central dependency injection container and per-frame state
- **[FrameResources.md](FrameResources.md)** — Frame-in-flight synchronization and resource management
- **[ResourceManager.md](ResourceManager.md)** — VMA-based GPU memory allocation and resource lifecycle
- **[FloatingOrigin.md](FloatingOrigin.md)** — Large-world support with double-precision coordinates
### Rendering
- **[RenderGraph.md](RenderGraph.md)** — DAG-based render pass scheduling with automatic barriers
- **[RenderPasses.md](RenderPasses.md)** — Built-in passes: geometry, lighting, SSR, volumetrics, particles, tonemap, FXAA
- **[PipelineManager.md](PipelineManager.md)** — Graphics/compute pipeline creation and hot-reloading
- **[Descriptors.md](Descriptors.md)** — Descriptor set management and binding strategies
- **[SHADERS.md](SHADERS.md)** — Shader compilation, includes, and conventions
### Advanced Rendering Features
- **[MultiLighting.md](MultiLighting.md)** — Deferred lighting with point/spot lights and IBL
- **[IBL.md](IBL.md)** — Image-based lighting and local reflection probes
- **[RayTracing.md](RayTracing.md)** — Ray-traced shadows and reflections with hybrid modes
- **[ParticleSystem.md](ParticleSystem.md)** — GPU particle simulation (128K particles, flipbook, soft particles)
- **[Volumetrics.md](Volumetrics.md)** — Voxel-based clouds, smoke, and flame with raymarching
- **[materials.md](materials.md)** — PBR material system and texture bindings
### Scene Management
- **[Scene.md](Scene.md)** — Scene graph, node hierarchy, and draw context
- **[ASSETS.md](ASSETS.md)** — Asset management overview
- **[asset_manager.md](asset_manager.md)** — AssetManager API and async loading
- **[TextureLoading.md](TextureLoading.md)** — Texture streaming, VRAM budgeting, and KTX2 support
- **[Picking.md](Picking.md)** — BVH-based object picking and selection
### UI and Input
- **[ImGuiSystem.md](ImGuiSystem.md)** — ImGui integration and debug UI
- **[InputSystem.md](InputSystem.md)** — Keyboard, mouse, and cursor handling
### Compute and Effects
- **[Compute.md](Compute.md)** — Compute pipeline creation and dispatch
### Game Development API
- **[GameAPI.md](GameAPI.md)** — High-level game-facing API (textures, lighting, picking, particles, volumetrics)
- **[debug_draw_api_examples.md](debug_draw_api_examples.md)** — Debug drawing examples (lines, spheres, AABBs, etc.)
## Documentation Organization
### By System
**Core Systems:**
- Engine: [EngineContext.md](EngineContext.md), [FrameResources.md](FrameResources.md), [ResourceManager.md](ResourceManager.md)
- Rendering: [RenderGraph.md](RenderGraph.md), [RenderPasses.md](RenderPasses.md), [PipelineManager.md](PipelineManager.md)
- Scene: [Scene.md](Scene.md), [asset_manager.md](asset_manager.md), [TextureLoading.md](TextureLoading.md)
**Rendering Features:**
- Lighting: [MultiLighting.md](MultiLighting.md), [IBL.md](IBL.md)
- Effects: [ParticleSystem.md](ParticleSystem.md), [Volumetrics.md](Volumetrics.md)
- Post-processing: [RenderPasses.md](RenderPasses.md) (SSR, Tonemap, FXAA sections)
- Ray Tracing: [RayTracing.md](RayTracing.md)
**Developer Tools:**
- Debugging: [debug_draw_api_examples.md](debug_draw_api_examples.md), [ImGuiSystem.md](ImGuiSystem.md)
- Input: [InputSystem.md](InputSystem.md), [Picking.md](Picking.md)
### By Task
**Setting up the engine:**
1. [BUILD.md](BUILD.md) — Build and dependencies
2. [RUNTIME.md](RUNTIME.md) — Understanding the runtime loop
3. [EngineContext.md](EngineContext.md) — Core architecture
4. [GameAPI.md](GameAPI.md) — High-level API
**Creating content:**
1. [ASSETS.md](ASSETS.md) — Asset pipeline overview
2. [TextureLoading.md](TextureLoading.md) — Loading textures
3. [Scene.md](Scene.md) — Adding objects to the scene
4. [materials.md](materials.md) — Material setup
**Adding effects:**
1. [MultiLighting.md](MultiLighting.md) — Point/spot lights
2. [ParticleSystem.md](ParticleSystem.md) — Particles (fire, smoke, sparks)
3. [Volumetrics.md](Volumetrics.md) — Clouds and atmospheric effects
4. [IBL.md](IBL.md) — Environment lighting
**Debugging and visualization:**
1. [debug_draw_api_examples.md](debug_draw_api_examples.md) — Debug primitives
2. [ImGuiSystem.md](ImGuiSystem.md) — Debug UI
3. [Picking.md](Picking.md) — Object selection
**Optimizing performance:**
1. [TextureLoading.md](TextureLoading.md) — VRAM budgeting
2. [RenderGraph.md](RenderGraph.md) — Render pass optimization
3. [FrameResources.md](FrameResources.md) — Frame synchronization
**Writing shaders:**
1. [SHADERS.md](SHADERS.md) — Shader conventions
2. [Descriptors.md](Descriptors.md) — Descriptor bindings
3. [RenderPasses.md](RenderPasses.md) — Custom passes
**Advanced topics:**
1. [RayTracing.md](RayTracing.md) — Hardware ray tracing
2. [FloatingOrigin.md](FloatingOrigin.md) — Large worlds
3. [Compute.md](Compute.md) — GPU compute
## Rendering Pipeline Overview
The engine uses a deferred PBR pipeline with the following stages:
1. **Background** — Sky/gradient generation (compute)
2. **Geometry** — G-Buffer pass (position, normal, albedo, AO/emissive)
3. **Shadows** — Cascaded shadow maps (4 cascades, optional RT)
4. **Lighting** — Deferred PBR lighting (point/spot/directional, IBL)
5. **SSR** — Screen-space reflections (optional RT fallback)
6. **Volumetrics** — Voxel clouds/smoke/flame (up to 4 volumes)
7. **Particles** — GPU particle systems (up to 128K particles)
8. **Tonemap + Bloom** — HDR → LDR conversion
9. **FXAA** — Anti-aliasing
10. **Transparent** — Forward rendering for transparent objects
11. **DebugDraw** — Debug visualization
12. **ImGui** — UI overlay
13. **Present** — Swapchain presentation
See [RenderPasses.md](RenderPasses.md) for details.
## Key Features
- **Modern Vulkan API** — Dynamic rendering, synchronization2, ray query
- **Deferred PBR Pipeline** — Physically-based materials with IBL
- **GPU-Driven Systems** — Particles and volumetrics fully GPU-simulated
- **Render Graph** — Automatic barrier insertion and resource management
- **Ray Tracing** — Hybrid shadows and reflections (optional)
- **Texture Streaming** — VRAM budgeting with LRU eviction
- **Floating-Origin** — Double-precision world coordinates for large worlds
- **Hot-Reload** — Shader recompilation without restart
- **Debug Tools** — Immediate-mode debug drawing and ImGui integration
## Architecture Highlights
### Rendering
- **Render Graph** ([RenderGraph.md](RenderGraph.md)): DAG-based execution with automatic resource transitions
- **Pipeline Manager** ([PipelineManager.md](PipelineManager.md)): Hot-reloadable shaders and compute pipelines
- **Multi-Lighting** ([MultiLighting.md](MultiLighting.md)): Clustered forward+ deferred hybrid
### GPU-Driven Effects
- **Particles** ([ParticleSystem.md](ParticleSystem.md)): 128K particle global pool, compute-based simulation, block-level depth sorting
- **Volumetrics** ([Volumetrics.md](Volumetrics.md)): Semi-Lagrangian advection, procedural noise injection, raymarch composite
### Asset Pipeline
- **Async Loading** ([asset_manager.md](asset_manager.md)): Background thread pool with priority queuing
- **Texture Streaming** ([TextureLoading.md](TextureLoading.md)): Automatic VRAM management with upload budgeting
- **KTX2 Support**: Compressed texture formats (BC7, ASTC) with mipmaps
### Developer Experience
- **GameAPI** ([GameAPI.md](GameAPI.md)): Stable, high-level C++ API abstracting Vulkan details
- **Debug Drawing** ([debug_draw_api_examples.md](debug_draw_api_examples.md)): Immediate-mode primitives with depth testing
- **ImGui Integration** ([ImGuiSystem.md](ImGuiSystem.md)): Full engine UI with live parameter editing
## Contributing
When adding new features:
1. Update relevant documentation in `docs/`
2. Add examples to [GameAPI.md](GameAPI.md) if exposing new API
3. Include shader documentation in [SHADERS.md](SHADERS.md) for new shaders
## Getting Help
- **Build issues**: [BUILD.md](BUILD.md), [TROUBLESHOOTING.md](TROUBLESHOOTING.md)
- **Runtime errors**: [RUNTIME.md](RUNTIME.md), [EngineContext.md](EngineContext.md)
- **Performance**: [TextureLoading.md](TextureLoading.md), [RenderGraph.md](RenderGraph.md)
- **Usage questions**: [GameAPI.md](GameAPI.md), [debug_draw_api_examples.md](debug_draw_api_examples.md)

487
docs/Volumetrics.md Normal file
View File

@@ -0,0 +1,487 @@
# Volumetric Cloud System
The volumetric system provides GPU-accelerated voxel-based rendering for clouds, smoke, and flame effects using raymarching and procedural density simulation.
## Architecture Overview
The system is implemented across multiple components:
- **CloudPass** (`src/render/passes/clouds.h/.cpp`) — Render pass managing voxel volumes, compute simulation, and raymarching
- **GameAPI** (`src/core/game_api.h`) — High-level API for configuring volumetric effects
- **Shaders**
- `shaders/cloud_voxel_advect.comp` — Voxel density simulation (advection + injection)
- `shaders/clouds.frag` — Raymarching fragment shader
## Key Features
- **Voxel-based density**: Cubic grids (4-256³ resolution) storing per-voxel density values
- **Three volume types**: Clouds (infinite XZ wrap), Smoke (localized), Flame (emissive)
- **GPU simulation**: Semi-Lagrangian advection with procedural noise injection
- **Raymarching composite**: Beer-Lambert absorption + single-scattering approximation
- **Ping-pong buffers**: Double-buffered voxel grids for temporal stability
- **Camera following**: Volumes can anchor to camera XZ (infinite clouds) or drift in world-space
- **Floating-origin stable**: Automatically adjusts volume positions when world origin shifts
- **Multi-volume support**: Up to 4 independent volumes (`MAX_VOXEL_VOLUMES = 4`)
## Volume Types
### Clouds (Type 0)
- **Behavior**: Continuous XZ wrapping for infinite cloud layers
- **Injection**: Broad slab with height-based shaping (upper/lower bounds)
- **Advection**: Wind wraps in XZ, clamped in Y
- **Typical use**: Sky clouds, atmospheric layers
### Smoke (Type 1)
- **Behavior**: Localized emission with soft dissipation
- **Injection**: Spherical emitter in UVW space with softer noise threshold
- **Advection**: Fully clamped (no wrapping)
- **Typical use**: Smoke columns, steam, fog banks
### Flame (Type 2)
- **Behavior**: Flickering emissive source with strong noise
- **Injection**: Spiky procedural noise, blends toward injected field (avoids fog accumulation)
- **Advection**: Fully clamped (no wrapping)
- **Rendering**: Adds emission term (`emissionColor × emissionStrength`)
- **Typical use**: Fires, torches, explosions
## Creating Volumetric Effects
### Via GameAPI
```cpp
#include "core/game_api.h"
GameAPI::Engine api(&engine);
// Enable volumetrics globally
api.set_volumetrics_enabled(true);
// Configure a cloud volume (index 0)
GameAPI::VoxelVolumeSettings cloud;
cloud.enabled = true;
cloud.type = GameAPI::VoxelVolumeType::Clouds;
// Position: follow camera XZ, offset in Y
cloud.followCameraXZ = true;
cloud.volumeCenterLocal = glm::vec3(0.0f, 50.0f, 0.0f); // 50 units above camera
cloud.volumeHalfExtents = glm::vec3(100.0f, 20.0f, 100.0f); // 200×40×200 box
// Animation: enable voxel advection
cloud.animateVoxels = true;
cloud.windVelocityLocal = glm::vec3(5.0f, 2.0f, 0.0f); // Drift +X, rise +Y
cloud.dissipation = 0.5f; // Slow decay
cloud.noiseStrength = 0.8f;
cloud.noiseScale = 8.0f;
cloud.noiseSpeed = 0.3f;
// Rendering
cloud.densityScale = 1.5f;
cloud.coverage = 0.3f; // Higher = less dense (threshold)
cloud.extinction = 1.0f;
cloud.stepCount = 64; // Raymarch steps (quality vs performance)
cloud.gridResolution = 64; // 64³ voxel grid
// Shading
cloud.albedo = glm::vec3(1.0f, 1.0f, 1.0f); // White clouds
cloud.scatterStrength = 1.2f;
cloud.emissionColor = glm::vec3(0.0f); // No emission
cloud.emissionStrength = 0.0f;
api.set_voxel_volume(0, cloud);
```
### Flame Effect
```cpp
GameAPI::VoxelVolumeSettings flame;
flame.enabled = true;
flame.type = GameAPI::VoxelVolumeType::Flame;
// Position: absolute world location
flame.followCameraXZ = false;
flame.volumeCenterLocal = glm::vec3(0.0f, 1.0f, 0.0f);
flame.volumeHalfExtents = glm::vec3(1.0f, 2.0f, 1.0f); // 2×4×2 box
// Animation
flame.animateVoxels = true;
flame.windVelocityLocal = glm::vec3(0.0f, 8.0f, 0.0f); // Rise upward
flame.dissipation = 2.0f; // Fast decay
flame.noiseStrength = 1.5f;
flame.noiseScale = 10.0f;
flame.noiseSpeed = 2.0f;
// Emitter in UVW space (bottom center)
flame.emitterUVW = glm::vec3(0.5f, 0.05f, 0.5f);
flame.emitterRadius = 0.2f; // 20% of volume size
// Shading
flame.densityScale = 2.0f;
flame.coverage = 0.0f;
flame.extinction = 0.8f;
flame.stepCount = 48;
flame.gridResolution = 48;
flame.albedo = glm::vec3(1.0f, 0.6f, 0.2f); // Orange scatter
flame.scatterStrength = 0.5f;
flame.emissionColor = glm::vec3(1.0f, 0.5f, 0.1f); // Orange-red glow
flame.emissionStrength = 3.0f; // Strong emission
api.set_voxel_volume(1, flame);
```
## Simulation Details
### Voxel Advection (Compute Shader)
The `cloud_voxel_advect.comp` shader updates voxel density each frame:
1. **Semi-Lagrangian advection**: Backtrace along wind velocity
```glsl
vec3 back = uvw - (windVelocityLocal / volumeSize) * dt;
```
- Clouds: Wrap XZ (`fract(back.xz)`), clamp Y
- Smoke/Flame: Clamp all axes
2. **Trilinear sampling**: Sample input density at backtraced position
```glsl
float advected = sample_density_trilinear(back, gridResolution);
```
3. **Dissipation**: Exponential decay
```glsl
advected *= exp(-dissipation * dt);
```
4. **Noise injection**: Procedural density injection using 4-octave FBM
- **Clouds**: Broad slab with height shaping
```glsl
injected = smoothstep(0.55, 0.80, fbm3(uvw * noiseScale + time * noiseSpeed));
low = smoothstep(0.0, 0.18, uvw.y);
high = 1.0 - smoothstep(0.78, 1.0, uvw.y);
injected *= low * high;
```
- **Smoke**: Spherical emitter with softer threshold
```glsl
shape = 1.0 - smoothstep(emitterRadius, emitterRadius * 1.25, distance(uvw, emitterUVW));
injected = smoothstep(0.45, 0.75, fbm3(...)) * shape;
```
- **Flame**: Spiky noise with flickering
```glsl
injected = (fbm3(...) ^ 2) * shape;
out_density = mix(advected, injected, noiseStrength * dt); // Blend toward injected
```
5. **Write output**: Write to ping-pong buffer
```glsl
vox_out.density[idx3(c, gridResolution)] = clamp(out_density, 0.0, 1.0);
```
### Raymarching (Fragment Shader)
The `clouds.frag` shader composites volumes onto the HDR buffer:
1. **Ray setup**:
- Reconstruct world-space ray from screen UV
- Define AABB from `volumeCenterLocal ± volumeHalfExtents`
- Compute ray-AABB intersection (`t0`, `t1`)
2. **Geometry clipping**:
- Sample G-buffer position (`posTex`)
- If opaque geometry exists, clamp `t1` to surface distance
- Prevents clouds rendering behind solid objects
3. **Raymarching loop**:
```glsl
float transmittance = 1.0;
vec3 scattering = vec3(0.0);
for (int i = 0; i < stepCount; ++i) {
vec3 p = camPos + rd * t;
float density = sample_voxel_density(p, bmin, bmax);
// Apply coverage threshold
density = max(density - coverage, 0.0) * densityScale;
// Beer-Lambert absorption
float extinction_coeff = density * extinction;
float step_transmittance = exp(-extinction_coeff * dt);
// In-scattering (single-scattering approximation)
vec3 light_contrib = albedo * scatterStrength * density;
// Flame emission
if (volumeType == 2) {
light_contrib += emissionColor * emissionStrength * density;
}
scattering += transmittance * (1.0 - step_transmittance) * light_contrib;
transmittance *= step_transmittance;
t += dt;
}
```
4. **Composite**:
```glsl
vec3 finalColor = baseColor * transmittance + scattering;
outColor = vec4(finalColor, 1.0);
```
### Floating-Origin Stability
When the world origin shifts (`CloudPass::update_time_and_origin_delta()`):
- Volumes with `followCameraXZ = false` are adjusted: `volumeCenterLocal -= origin_delta`
- Ensures volumes stay in the same world-space location despite coordinate changes
### Volume Drift
For non-camera-following volumes:
```cpp
volumeCenterLocal += volumeVelocityLocal * dt;
```
Allows volumes to drift independently (e.g., moving storm clouds).
## Memory Management
### Voxel Buffers
Each volume maintains two ping-pong buffers (`voxelDensity[2]`):
- **Read buffer**: Input to advection compute shader and raymarch fragment shader
- **Write buffer**: Output of advection compute shader
- Buffers swap each frame (`voxelReadIndex = 1 - voxelReadIndex`)
Buffer size: `gridResolution³ × sizeof(float)` bytes
- Example: 64³ grid = 1 MB per buffer (2 MB total per volume)
- Maximum 4 volumes = 8 MB total (at 64³ resolution)
### Lazy Allocation
Voxel buffers are allocated only when:
- `enabled = true`
- `gridResolution` changes
- Called via `rebuild_voxel_density()`
Initial density is procedurally generated using the same FBM noise as injection.
## Render Graph Integration
The cloud pass registers after lighting/SSR:
```cpp
RGImageHandle CloudPass::register_graph(RenderGraph* graph,
RGImageHandle hdrInput,
RGImageHandle gbufPos)
{
// For each enabled volume:
// 1. Optional: Add compute pass for voxel advection (if animateVoxels == true)
// 2. Add graphics pass for raymarching composite
// Passes read/write ping-pong buffers and sample G-buffer depth
// Returns final HDR image with clouds composited
}
```
**Pass structure** (per volume):
1. **VoxelUpdate** (compute, optional): Read voxel buffer → advect → write voxel buffer
2. **Volumetrics** (graphics): Read HDR input + G-buffer + voxel buffer → raymarch → write HDR output
Volumes are rendered sequentially (volume 0 → 1 → 2 → 3) to allow layered effects.
## Performance Considerations
- **Voxel resolution**: Higher resolution = better detail but 8× memory per doubling (64³ = 1 MB, 128³ = 8 MB)
- **Raymarch steps**: More steps = smoother results but linear fragment cost (48-128 typical)
- **Fill rate**: Volumetrics are fragment-shader intensive; reduce `stepCount` on low-end hardware
- **Advection cost**: Compute cost is `O(resolution³)` but typically <1ms for 64³
- **Multi-volume overhead**: Each active volume adds a full raymarch pass; budget 2-3 volumes max
### Recommended Settings
**High quality (desktop)**:
```cpp
gridResolution = 128;
stepCount = 128;
```
**Medium quality (mid-range)**:
```cpp
gridResolution = 64;
stepCount = 64;
```
**Low quality (mobile/low-end)**:
```cpp
gridResolution = 32;
stepCount = 32;
```
## Parameter Reference
### VoxelVolumeSettings
```cpp
struct VoxelVolumeSettings
{
// Enable/type
bool enabled{false};
VoxelVolumeType type{Clouds}; // Clouds, Smoke, Flame
// Positioning
bool followCameraXZ{false}; // Anchor to camera XZ
bool animateVoxels{true}; // Enable voxel simulation
glm::vec3 volumeCenterLocal{0,2,0};
glm::vec3 volumeHalfExtents{8,8,8};
glm::vec3 volumeVelocityLocal{0}; // Drift velocity (if !followCameraXZ)
// Rendering
float densityScale{1.0}; // Density multiplier
float coverage{0.0}; // 0..1 threshold (higher = less dense)
float extinction{1.0}; // Absorption coefficient
int stepCount{48}; // Raymarch steps (8-256)
uint32_t gridResolution{48}; // Voxel grid resolution (4-256)
// Simulation (advection)
glm::vec3 windVelocityLocal{0,2,0}; // Wind velocity (units/sec)
float dissipation{1.25}; // Density decay (1/sec)
float noiseStrength{1.0}; // Injection rate
float noiseScale{8.0}; // Noise frequency
float noiseSpeed{1.0}; // Time scale
// Emitter (smoke/flame only)
glm::vec3 emitterUVW{0.5,0.05,0.5}; // Normalized (0..1)
float emitterRadius{0.18}; // Normalized (0..1)
// Shading
glm::vec3 albedo{1,1,1}; // Scattering tint
float scatterStrength{1.0};
glm::vec3 emissionColor{1,0.6,0.25};// Flame emission tint
float emissionStrength{0.0}; // Flame emission strength
};
```
## Common Presets
### Stratocumulus Clouds
```cpp
cloud.type = Clouds;
cloud.followCameraXZ = true;
cloud.volumeCenterLocal = glm::vec3(0, 80, 0);
cloud.volumeHalfExtents = glm::vec3(200, 30, 200);
cloud.windVelocityLocal = glm::vec3(3, 1, 0);
cloud.dissipation = 0.3f;
cloud.densityScale = 1.2f;
cloud.coverage = 0.4f;
cloud.gridResolution = 64;
cloud.stepCount = 64;
```
### Torch Flame
```cpp
flame.type = Flame;
flame.followCameraXZ = false;
flame.volumeCenterLocal = glm::vec3(0, 1.5, 0);
flame.volumeHalfExtents = glm::vec3(0.3, 0.8, 0.3);
flame.windVelocityLocal = glm::vec3(0, 6, 0);
flame.dissipation = 2.5f;
flame.noiseStrength = 2.0f;
flame.emitterUVW = glm::vec3(0.5, 0.1, 0.5);
flame.emitterRadius = 0.25f;
flame.emissionColor = glm::vec3(1.0, 0.4, 0.1);
flame.emissionStrength = 4.0f;
flame.gridResolution = 32;
flame.stepCount = 32;
```
### Smoke Plume
```cpp
smoke.type = Smoke;
smoke.followCameraXZ = false;
smoke.volumeCenterLocal = glm::vec3(0, 2, 0);
smoke.volumeHalfExtents = glm::vec3(2, 5, 2);
smoke.windVelocityLocal = glm::vec3(1, 4, 0);
smoke.dissipation = 1.0f;
smoke.noiseStrength = 1.2f;
smoke.emitterUVW = glm::vec3(0.5, 0.05, 0.5);
smoke.emitterRadius = 0.15f;
smoke.albedo = glm::vec3(0.4, 0.4, 0.4);
smoke.scatterStrength = 0.8f;
smoke.gridResolution = 48;
smoke.stepCount = 48;
```
## Troubleshooting
**Volumes not visible**:
- Ensure `enabled = true` and `volumetrics_enabled = true` globally
- Check AABB intersects camera frustum
- Reduce `coverage` (lower = denser)
- Increase `densityScale`
**Blocky/noisy appearance**:
- Increase `gridResolution` (64 → 128)
- Increase `stepCount` (48 → 96)
- Adjust `noiseScale` for finer detail
**Performance issues**:
- Reduce `gridResolution` (64 → 32)
- Reduce `stepCount` (64 → 32)
- Disable `animateVoxels` for static volumes
- Reduce number of active volumes
**Volumes don't animate**:
- Ensure `animateVoxels = true`
- Check `windVelocityLocal` is non-zero
- Verify `noiseStrength > 0` and `noiseSpeed > 0`
**Volumes flicker/pop**:
- Increase `dissipation` to smooth density changes
- Lower `noiseStrength` for subtler injection
- Use higher `gridResolution` for temporal stability
## API Reference
### GameAPI::Engine Volumetric Methods
```cpp
// Global enable/disable
void set_volumetrics_enabled(bool enabled);
bool get_volumetrics_enabled() const;
// Volume configuration (index 0-3)
void set_voxel_volume(int index, const VoxelVolumeSettings& settings);
VoxelVolumeSettings get_voxel_volume(int index) const;
// Retrieve all volumes
std::vector<VoxelVolumeSettings> get_voxel_volumes() const;
```
### CloudPass
```cpp
class CloudPass : public IRenderPass
{
// Render graph registration
RGImageHandle register_graph(RenderGraph* graph,
RGImageHandle hdrInput,
RGImageHandle gbufPos);
// Internal voxel management
void rebuild_voxel_density(uint32_t volume_index,
uint32_t resolution,
const VoxelVolumeSettings& settings);
};
```
## See Also
- `docs/ParticleSystem.md` — GPU particle system documentation
- `docs/RenderGraph.md` — Render graph integration details
- `docs/RenderPasses.md` — Pass execution and pipeline management
- `docs/GameAPI.md` — High-level game API
- `docs/Compute.md` — Compute pipeline details

View File

@@ -10,10 +10,11 @@ void main()
{ {
// Reconstruct world-space direction from screen UV // Reconstruct world-space direction from screen UV
vec2 ndc = inUV * 2.0 - 1.0; // [-1,1] vec2 ndc = inUV * 2.0 - 1.0; // [-1,1]
vec4 clip = vec4(ndc, 1.0, 1.0);
vec4 vpos = inverse(sceneData.proj) * clip; // Avoid per-pixel matrix inverses. With a perspective projection, a view-space ray can be
vec3 viewDir = normalize(vpos.xyz / max(vpos.w, 1e-6)); // reconstructed directly from the projection diagonal and then rotated to world space.
vec3 worldDir = normalize((inverse(sceneData.view) * vec4(viewDir, 0.0)).xyz); vec3 viewDir = normalize(vec3(ndc.x / sceneData.proj[0][0], ndc.y / sceneData.proj[1][1], -1.0));
vec3 worldDir = normalize(transpose(mat3(sceneData.view)) * viewDir);
vec2 uv = dir_to_equirect(worldDir); vec2 uv = dir_to_equirect(worldDir);
// Sample a dedicated background environment map when available. // Sample a dedicated background environment map when available.

View File

@@ -35,6 +35,17 @@ const float SHADOW_MIN_BIAS = 1e-5;
const float SHADOW_RAY_TMIN = 0.02;// start a bit away from the surface const float SHADOW_RAY_TMIN = 0.02;// start a bit away from the surface
const float SHADOW_RAY_ORIGIN_BIAS = 0.01;// world units const float SHADOW_RAY_ORIGIN_BIAS = 0.01;// world units
vec3 getCameraWorldPosition()
{
// view = [ R^T -R^T*C ]
// [ 0 1 ]
// => C = -R * T, where T is view[3].xyz and R = transpose(mat3(view))
mat3 rotT = mat3(sceneData.view);
mat3 rot = transpose(rotT);
vec3 T = sceneData.view[3].xyz;
return -rot * T;
}
float hash12(vec2 p) float hash12(vec2 p)
{ {
vec3 p3 = fract(vec3(p.xyx) * 0.1031); vec3 p3 = fract(vec3(p.xyx) * 0.1031);
@@ -290,7 +301,7 @@ void main(){
float ao = extraSample.x; float ao = extraSample.x;
vec3 emissive = extraSample.yzw; vec3 emissive = extraSample.yzw;
vec3 camPos = vec3(inverse(sceneData.view)[3]); vec3 camPos = getCameraWorldPosition();
vec3 V = normalize(camPos - pos); vec3 V = normalize(camPos - pos);
// Directional sun term using evaluate_brdf + cascaded shadowing // Directional sun term using evaluate_brdf + cascaded shadowing

View File

@@ -27,6 +27,17 @@ const float SHADOW_RPDB_SCALE = 1.0;
// Minimum clamp to keep a tiny bias even on perpendicular receivers // Minimum clamp to keep a tiny bias even on perpendicular receivers
const float SHADOW_MIN_BIAS = 1e-5; const float SHADOW_MIN_BIAS = 1e-5;
vec3 getCameraWorldPosition()
{
// view = [ R^T -R^T*C ]
// [ 0 1 ]
// => C = -R * T, where T is view[3].xyz and R = transpose(mat3(view))
mat3 rotT = mat3(sceneData.view);
mat3 rot = transpose(rotT);
vec3 T = sceneData.view[3].xyz;
return -rot * T;
}
float hash12(vec2 p) float hash12(vec2 p)
{ {
vec3 p3 = fract(vec3(p.xyx) * 0.1031); vec3 p3 = fract(vec3(p.xyx) * 0.1031);
@@ -219,7 +230,7 @@ void main(){
float ao = extraSample.x; float ao = extraSample.x;
vec3 emissive = extraSample.yzw; vec3 emissive = extraSample.yzw;
vec3 camPos = vec3(inverse(sceneData.view)[3]); vec3 camPos = getCameraWorldPosition();
vec3 V = normalize(camPos - pos); vec3 V = normalize(camPos - pos);
// Directional sun term using evaluate_brdf + cascaded shadowing // Directional sun term using evaluate_brdf + cascaded shadowing

View File

@@ -30,6 +30,7 @@ layout(buffer_reference, std430) readonly buffer VertexBuffer{
layout(push_constant) uniform constants layout(push_constant) uniform constants
{ {
mat4 render_matrix; mat4 render_matrix;
mat3 normal_matrix;
VertexBuffer vertexBuffer; VertexBuffer vertexBuffer;
uint objectID; uint objectID;
} PushConstants; } PushConstants;

View File

@@ -13,6 +13,17 @@ layout (location = 4) in vec4 inTangent;
layout (location = 0) out vec4 outFragColor; layout (location = 0) out vec4 outFragColor;
vec3 getCameraWorldPosition()
{
// view = [ R^T -R^T*C ]
// [ 0 1 ]
// => C = -R * T, where T is view[3].xyz and R = transpose(mat3(view))
mat3 rotT = mat3(sceneData.view);
mat3 rot = transpose(rotT);
vec3 T = sceneData.view[3].xyz;
return -rot * T;
}
void main() void main()
{ {
// Base color with material factor and texture // Base color with material factor and texture
@@ -43,7 +54,7 @@ void main()
vec3 T = normalize(inTangent.xyz); vec3 T = normalize(inTangent.xyz);
vec3 B = normalize(cross(Nn, T)) * inTangent.w; vec3 B = normalize(cross(Nn, T)) * inTangent.w;
vec3 N = normalize(T * Nm.x + B * Nm.y + Nn * Nm.z); vec3 N = normalize(T * Nm.x + B * Nm.y + Nn * Nm.z);
vec3 camPos = vec3(inverse(sceneData.view)[3]); vec3 camPos = getCameraWorldPosition();
vec3 V = normalize(camPos - inWorldPos); vec3 V = normalize(camPos - inWorldPos);
// Directional sun term (no shadows in forward path) // Directional sun term (no shadows in forward path)

View File

@@ -29,6 +29,7 @@ layout(buffer_reference, std430) readonly buffer VertexBuffer{
layout(push_constant) uniform constants layout(push_constant) uniform constants
{ {
mat4 render_matrix; mat4 render_matrix;
mat3 normal_matrix;
VertexBuffer vertexBuffer; VertexBuffer vertexBuffer;
uint objectID; uint objectID;
} PushConstants; } PushConstants;
@@ -37,8 +38,7 @@ void main()
{ {
Vertex v = PushConstants.vertexBuffer.vertices[gl_VertexIndex]; Vertex v = PushConstants.vertexBuffer.vertices[gl_VertexIndex];
mat3 M = mat3(PushConstants.render_matrix); mat3 normalMatrix = PushConstants.normal_matrix;
mat3 normalMatrix = transpose(inverse(M));
vec4 worldPos = PushConstants.render_matrix * vec4(v.position, 1.0); vec4 worldPos = PushConstants.render_matrix * vec4(v.position, 1.0);
gl_Position = sceneData.viewproj * worldPos; gl_Position = sceneData.viewproj * worldPos;
@@ -52,4 +52,3 @@ void main()
outUV = vec2(v.uv_x, v.uv_y); outUV = vec2(v.uv_x, v.uv_y);
outWorldPos = worldPos.xyz; outWorldPos = worldPos.xyz;
} }

View File

@@ -20,9 +20,9 @@ vec3 getCameraWorldPosition()
return -rot * T; // C = -R * T return -rot * T; // C = -R * T
} }
vec3 projectToScreen(vec3 worldPos) vec3 projectToScreenFromView(vec3 viewPos)
{ {
vec4 clip = sceneData.viewproj * vec4(worldPos, 1.0); vec4 clip = sceneData.proj * vec4(viewPos, 1.0);
if (clip.w <= 0.0) if (clip.w <= 0.0)
return vec3(0.0, 0.0, -1.0); return vec3(0.0, 0.0, -1.0);
@@ -64,6 +64,8 @@ void main()
vec3 camPos = getCameraWorldPosition(); vec3 camPos = getCameraWorldPosition();
vec3 V = normalize(camPos - worldPos); vec3 V = normalize(camPos - worldPos);
vec3 R = reflect(-V, N); vec3 R = reflect(-V, N);
vec3 viewPos = (sceneData.view * vec4(worldPos, 1.0)).xyz;
vec3 viewDir = normalize((sceneData.view * vec4(R, 0.0)).xyz);
float gloss = 1.0 - roughness; float gloss = 1.0 - roughness;
float F0 = mix(0.04, 1.0, metallic); float F0 = mix(0.04, 1.0, metallic);
@@ -87,9 +89,9 @@ void main()
float t = STEP_LENGTH; float t = STEP_LENGTH;
for (int i = 0; i < maxSteps && t <= MAX_DISTANCE; ++i, t += STEP_LENGTH) for (int i = 0; i < maxSteps && t <= MAX_DISTANCE; ++i, t += STEP_LENGTH)
{ {
vec3 samplePos = worldPos + R * t; vec3 sampleViewPos = viewPos + viewDir * t;
vec3 proj = projectToScreen(samplePos); vec3 proj = projectToScreenFromView(sampleViewPos);
if (proj.z < 0.0) if (proj.z < 0.0)
{ {
break; break;
@@ -102,10 +104,9 @@ void main()
continue; continue;
} }
vec3 viewSample = (sceneData.view * vec4(samplePos, 1.0)).xyz;
vec3 viewScene = (sceneData.view * vec4(scenePosSample.xyz, 1.0)).xyz; vec3 viewScene = (sceneData.view * vec4(scenePosSample.xyz, 1.0)).xyz;
float depthRay = -viewSample.z; float depthRay = -sampleViewPos.z;
float depthScene = -viewScene.z; float depthScene = -viewScene.z;
float depthDiff = depthRay - depthScene; float depthDiff = depthRay - depthScene;

View File

@@ -25,9 +25,9 @@ vec3 getCameraWorldPosition()
return -rot * T; // C = -R * T return -rot * T; // C = -R * T
} }
vec3 projectToScreen(vec3 worldPos) vec3 projectToScreenFromView(vec3 viewPos)
{ {
vec4 clip = sceneData.viewproj * vec4(worldPos, 1.0); vec4 clip = sceneData.proj * vec4(viewPos, 1.0);
if (clip.w <= 0.0) if (clip.w <= 0.0)
{ {
@@ -71,6 +71,8 @@ void main()
vec3 camPos = getCameraWorldPosition(); vec3 camPos = getCameraWorldPosition();
vec3 V = normalize(camPos - worldPos); vec3 V = normalize(camPos - worldPos);
vec3 R = reflect(-V, N); vec3 R = reflect(-V, N);
vec3 viewPos = (sceneData.view * vec4(worldPos, 1.0)).xyz;
vec3 viewDir = normalize((sceneData.view * vec4(R, 0.0)).xyz);
float gloss = 1.0 - roughness; float gloss = 1.0 - roughness;
float F0 = mix(0.04, 1.0, metallic); float F0 = mix(0.04, 1.0, metallic);
@@ -107,9 +109,9 @@ void main()
float t = STEP_LENGTH_SSR; float t = STEP_LENGTH_SSR;
for (int i = 0; i < maxSteps && t <= MAX_DISTANCE_SSR; ++i, t += STEP_LENGTH_SSR) for (int i = 0; i < maxSteps && t <= MAX_DISTANCE_SSR; ++i, t += STEP_LENGTH_SSR)
{ {
vec3 samplePos = worldPos + R * t; vec3 sampleViewPos = viewPos + viewDir * t;
vec3 proj = projectToScreen(samplePos); vec3 proj = projectToScreenFromView(sampleViewPos);
if (proj.z < 0.0) if (proj.z < 0.0)
{ {
break; break;
@@ -122,10 +124,9 @@ void main()
continue; continue;
} }
vec3 viewSample = (sceneData.view * vec4(samplePos, 1.0)).xyz;
vec3 viewScene = (sceneData.view * vec4(scenePosSample.xyz, 1.0)).xyz; vec3 viewScene = (sceneData.view * vec4(scenePosSample.xyz, 1.0)).xyz;
float depthRay = -viewSample.z; float depthRay = -sampleViewPos.z;
float depthScene = -viewScene.z; float depthScene = -viewScene.z;
float depthDiff = depthRay - depthScene; float depthDiff = depthRay - depthScene;
@@ -190,7 +191,7 @@ void main()
float tHit = rayQueryGetIntersectionTEXT(rq, true); float tHit = rayQueryGetIntersectionTEXT(rq, true);
vec3 hitPos = origin + R * tHit; vec3 hitPos = origin + R * tHit;
vec3 proj = projectToScreen(hitPos); vec3 proj = projectToScreenFromView((sceneData.view * vec4(hitPos, 1.0)).xyz);
if (proj.z >= 0.0) if (proj.z >= 0.0)
{ {
vec2 hitUV = proj.xy; vec2 hitUV = proj.xy;

View File

@@ -31,37 +31,51 @@ vec3 aces_tonemap(vec3 x)
return clamp((x*(a*x+b))/(x*(c*x+d)+e), 0.0, 1.0); return clamp((x*(a*x+b))/(x*(c*x+d)+e), 0.0, 1.0);
} }
void accum_bloom(vec3 c, float kernel_weight, inout vec3 bloom, inout float weight_sum)
{
float bright = max(max(c.r, c.g), c.b) - pc.bloomThreshold;
bright = max(bright, 0.0);
// Match the old behavior: only normalize over samples that pass the threshold.
float contribute = step(1e-5, bright);
bloom += c * bright * kernel_weight;
weight_sum += kernel_weight * contribute;
}
void main() void main()
{ {
vec3 hdr = texture(uHdr, inUV).rgb; vec3 hdr = texture(uHdr, inUV).rgb;
// Simple bloom in HDR space: gather bright neighbors and add a small blurred contribution. // Simple bloom in HDR space: approximate a 5x5 Gaussian blur using 9 bilinear samples (vs. 25 taps).
if (pc.bloomEnabled != 0) if (pc.bloomEnabled != 0 && pc.bloomIntensity > 0.0)
{ {
vec2 texel = 1.0 / vec2(textureSize(uHdr, 0)); vec2 texel = 1.0 / vec2(textureSize(uHdr, 0));
vec2 d = texel * 1.2; // Combines 1- and 2-texel taps via linear filtering (4:1 weight).
vec3 bloom = vec3(0.0); vec3 bloom = vec3(0.0);
int radius = 2; float wsum = 0.0;
int count = 0;
for (int x = -radius; x <= radius; ++x) // 1D weights [1 4 6 4 1] collapsed to 3 linear samples => weights [5 6 5]
// 2D separable => center 36, axis 30, corners 25 (sum 256).
accum_bloom(hdr, 36.0, bloom, wsum); // reuse center sample
accum_bloom(texture(uHdr, clamp(inUV + vec2( d.x, 0.0), vec2(0.0), vec2(1.0))).rgb, 30.0, bloom, wsum);
accum_bloom(texture(uHdr, clamp(inUV + vec2(-d.x, 0.0), vec2(0.0), vec2(1.0))).rgb, 30.0, bloom, wsum);
accum_bloom(texture(uHdr, clamp(inUV + vec2(0.0, d.y), vec2(0.0), vec2(1.0))).rgb, 30.0, bloom, wsum);
accum_bloom(texture(uHdr, clamp(inUV + vec2(0.0, -d.y), vec2(0.0), vec2(1.0))).rgb, 30.0, bloom, wsum);
accum_bloom(texture(uHdr, clamp(inUV + vec2( d.x, d.y), vec2(0.0), vec2(1.0))).rgb, 25.0, bloom, wsum);
accum_bloom(texture(uHdr, clamp(inUV + vec2(-d.x, d.y), vec2(0.0), vec2(1.0))).rgb, 25.0, bloom, wsum);
accum_bloom(texture(uHdr, clamp(inUV + vec2( d.x, -d.y), vec2(0.0), vec2(1.0))).rgb, 25.0, bloom, wsum);
accum_bloom(texture(uHdr, clamp(inUV + vec2(-d.x, -d.y), vec2(0.0), vec2(1.0))).rgb, 25.0, bloom, wsum);
if (wsum > 0.0)
{ {
for (int y = -radius; y <= radius; ++y) bloom /= wsum;
{
vec2 offset = vec2(x, y) * texel;
vec3 c = texture(uHdr, clamp(inUV + offset, vec2(0.0), vec2(1.0))).rgb;
float bright = max(max(c.r, c.g), c.b) - pc.bloomThreshold;
if (bright > 0.0)
{
bloom += c * bright;
count++;
}
}
}
if (count > 0)
{
bloom /= float(count);
}
hdr += pc.bloomIntensity * bloom; hdr += pc.bloomIntensity * bloom;
} }
}
// Simple exposure // Simple exposure
float exposure = max(pc.exposure, 0.0001); float exposure = max(pc.exposure, 0.0001);
@@ -77,4 +91,3 @@ void main()
outColor = vec4(mapped, 1.0); outColor = vec4(mapped, 1.0);
} }

View File

@@ -28,6 +28,7 @@ inline const char *string_VkFormat(VkFormat) { return "VkFormat"; }
#include <fmt/core.h> #include <fmt/core.h>
#include <glm/mat4x4.hpp> #include <glm/mat4x4.hpp>
#include <glm/mat3x4.hpp>
#include <glm/vec4.hpp> #include <glm/vec4.hpp>
#include <glm/vec3.hpp> #include <glm/vec3.hpp>
#include <glm/gtc/quaternion.hpp> #include <glm/gtc/quaternion.hpp>
@@ -194,9 +195,16 @@ struct GPUMeshBuffers {
// push constants for our mesh object draws // push constants for our mesh object draws
struct GPUDrawPushConstants { struct GPUDrawPushConstants {
glm::mat4 worldMatrix; glm::mat4 worldMatrix;
// std140-compatible representation of mat3 (3 x vec4 columns; w unused).
glm::mat3x4 normalMatrix;
VkDeviceAddress vertexBuffer; VkDeviceAddress vertexBuffer;
uint32_t objectID; uint32_t objectID;
}; };
static_assert(offsetof(GPUDrawPushConstants, worldMatrix) == 0);
static_assert(offsetof(GPUDrawPushConstants, normalMatrix) == 64);
static_assert(offsetof(GPUDrawPushConstants, vertexBuffer) == 112);
static_assert(offsetof(GPUDrawPushConstants, objectID) == 120);
static_assert(sizeof(GPUDrawPushConstants) == 128);
struct DrawContext; struct DrawContext;

View File

@@ -281,6 +281,12 @@ void GeometryPass::draw_geometry(VkCommandBuffer cmd,
} }
GPUDrawPushConstants push_constants{}; GPUDrawPushConstants push_constants{};
push_constants.worldMatrix = r.transform; push_constants.worldMatrix = r.transform;
{
const glm::mat3 n = glm::transpose(glm::inverse(glm::mat3(r.transform)));
push_constants.normalMatrix[0] = glm::vec4(n[0], 0.0f);
push_constants.normalMatrix[1] = glm::vec4(n[1], 0.0f);
push_constants.normalMatrix[2] = glm::vec4(n[2], 0.0f);
}
push_constants.vertexBuffer = r.vertexBufferAddress; push_constants.vertexBuffer = r.vertexBufferAddress;
push_constants.objectID = r.objectID; push_constants.objectID = r.objectID;

View File

@@ -19,6 +19,22 @@
#include "core/types.h" #include "core/types.h"
#include "core/config.h" #include "core/config.h"
namespace
{
struct ShadowPushConstants
{
glm::mat4 render_matrix;
VkDeviceAddress vertexBuffer;
uint32_t objectID;
uint32_t cascadeIndex;
};
static_assert(offsetof(ShadowPushConstants, render_matrix) == 0);
static_assert(offsetof(ShadowPushConstants, vertexBuffer) == 64);
static_assert(offsetof(ShadowPushConstants, objectID) == 72);
static_assert(offsetof(ShadowPushConstants, cascadeIndex) == 76);
static_assert(sizeof(ShadowPushConstants) == 80);
} // namespace
void ShadowPass::init(EngineContext *context) void ShadowPass::init(EngineContext *context)
{ {
_context = context; _context = context;
@@ -29,10 +45,7 @@ void ShadowPass::init(EngineContext *context)
// Keep push constants matching current shader layout for now // Keep push constants matching current shader layout for now
VkPushConstantRange pc{}; VkPushConstantRange pc{};
pc.offset = 0; pc.offset = 0;
// Push constants layout in shadow.vert is GPUDrawPushConstants + cascade index, rounded to 16 bytes pc.size = static_cast<uint32_t>(sizeof(ShadowPushConstants));
const uint32_t pcRaw = static_cast<uint32_t>(sizeof(GPUDrawPushConstants) + sizeof(uint32_t));
const uint32_t pcAligned = (pcRaw + 15u) & ~15u; // 16-byte alignment to match std430 expectations
pc.size = pcAligned;
pc.stageFlags = VK_SHADER_STAGE_VERTEX_BIT; pc.stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
GraphicsPipelineCreateInfo info{}; GraphicsPipelineCreateInfo info{};
@@ -180,12 +193,6 @@ void ShadowPass::draw_shadow(VkCommandBuffer cmd,
const DrawContext &dc = ctxLocal->getMainDrawContext(); const DrawContext &dc = ctxLocal->getMainDrawContext();
VkBuffer lastIndexBuffer = VK_NULL_HANDLE; VkBuffer lastIndexBuffer = VK_NULL_HANDLE;
struct ShadowPC
{
GPUDrawPushConstants draw;
uint32_t cascadeIndex;
};
for (const auto &r : dc.OpaqueSurfaces) for (const auto &r : dc.OpaqueSurfaces)
{ {
if (r.indexBuffer != lastIndexBuffer) if (r.indexBuffer != lastIndexBuffer)
@@ -194,12 +201,12 @@ void ShadowPass::draw_shadow(VkCommandBuffer cmd,
vkCmdBindIndexBuffer(cmd, r.indexBuffer, 0, VK_INDEX_TYPE_UINT32); vkCmdBindIndexBuffer(cmd, r.indexBuffer, 0, VK_INDEX_TYPE_UINT32);
} }
ShadowPC spc{}; ShadowPushConstants spc{};
spc.draw.worldMatrix = r.transform; spc.render_matrix = r.transform;
spc.draw.vertexBuffer = r.vertexBufferAddress; spc.vertexBuffer = r.vertexBufferAddress;
spc.draw.objectID = r.objectID; spc.objectID = r.objectID;
spc.cascadeIndex = cascadeIndex; spc.cascadeIndex = cascadeIndex;
vkCmdPushConstants(cmd, layout, VK_SHADER_STAGE_VERTEX_BIT, 0, sizeof(ShadowPC), &spc); vkCmdPushConstants(cmd, layout, VK_SHADER_STAGE_VERTEX_BIT, 0, sizeof(ShadowPushConstants), &spc);
vkCmdDrawIndexed(cmd, r.indexCount, 1, r.firstIndex, 0, 0); vkCmdDrawIndexed(cmd, r.indexCount, 1, r.firstIndex, 0, 0);
} }
} }

View File

@@ -195,6 +195,12 @@ void TransparentPass::draw_transparent(VkCommandBuffer cmd,
} }
GPUDrawPushConstants push{}; GPUDrawPushConstants push{};
push.worldMatrix = r.transform; push.worldMatrix = r.transform;
{
const glm::mat3 n = glm::transpose(glm::inverse(glm::mat3(r.transform)));
push.normalMatrix[0] = glm::vec4(n[0], 0.0f);
push.normalMatrix[1] = glm::vec4(n[1], 0.0f);
push.normalMatrix[2] = glm::vec4(n[2], 0.0f);
}
push.vertexBuffer = r.vertexBufferAddress; push.vertexBuffer = r.vertexBufferAddress;
push.objectID = r.objectID; push.objectID = r.objectID;
vkCmdPushConstants(cmd, r.material->pipeline->layout, vkCmdPushConstants(cmd, r.material->pipeline->layout,