Files
QuaternionEngine/shaders/particles_sort_blocks.comp
2025-12-18 17:32:30 +09:00

102 lines
2.3 KiB
Plaintext

#version 450
// Each system dispatch sorts up to 512 blocks of 256 particles by max view-space depth.
layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in;
struct Particle
{
vec4 pos_age;
vec4 vel_life;
vec4 color;
vec4 misc;
};
layout(std430, set = 0, binding = 0) readonly buffer ParticlePool
{
Particle particles[];
} pool;
layout(std430, set = 0, binding = 1) writeonly buffer SortedBlocks
{
uint blocks[];
} outBlocks;
layout(push_constant) uniform Push
{
uvec4 header; // x=base, y=count
mat4 view;
} pc;
const uint BLOCK_SIZE = 256u;
const uint MAX_BLOCKS = 512u;
shared float s_key[512];
shared uint s_block[512];
void main()
{
uint tid = gl_LocalInvocationID.x;
uint count = pc.header.y;
uint blockCount = (count + BLOCK_SIZE - 1u) / BLOCK_SIZE;
blockCount = min(blockCount, MAX_BLOCKS);
float key = 1e20;
if (tid < blockCount)
{
uint blockStart = pc.header.x + tid * BLOCK_SIZE;
uint localCount = min(BLOCK_SIZE, count - tid * BLOCK_SIZE);
float maxDepth = -1e20;
for (uint i = 0u; i < localCount; ++i)
{
uint idx = blockStart + i;
vec4 viewPos = pc.view * vec4(pool.particles[idx].pos_age.xyz, 1.0);
float depth = -viewPos.z;
maxDepth = max(maxDepth, depth);
}
// Sort ascending by -depth => farthest first.
key = -maxDepth;
}
s_key[tid] = key;
s_block[tid] = tid;
barrier();
// Bitonic sort ascending for 512 elements.
for (uint k = 2u; k <= MAX_BLOCKS; k <<= 1u)
{
for (uint j = (k >> 1u); j > 0u; j >>= 1u)
{
uint ixj = tid ^ j;
if (ixj > tid)
{
bool ascending = ((tid & k) == 0u);
float a = s_key[tid];
float b = s_key[ixj];
uint ai = s_block[tid];
uint bi = s_block[ixj];
bool swap = (ascending && (a > b)) || (!ascending && (a < b));
if (swap)
{
s_key[tid] = b;
s_key[ixj] = a;
s_block[tid] = bi;
s_block[ixj] = ai;
}
}
barrier();
}
}
if (tid < blockCount)
{
outBlocks.blocks[tid] = s_block[tid];
}
}