bvle-voxels/shaders/voxelTopingBLASCS.hlsl
Samuel Bouchet 0d3f8200b4 Refactor: remove dead CPU/MDI paths, GPU BLAS compute, 30Hz animation
- Remove ~430 lines of dead CPU mesh, MDI, and GPU cull render paths
  (rebuildMegaBuffer, IndirectDrawArgs, drawCountBuffer, cullShader, etc.)
- Add voxelTopingBLASCS.hlsl compute shader replacing 196ms CPU loop
  for toping BLAS position extraction (<1ms on GPU)
- Reduce animation rate from 60Hz to 30Hz (halves CPU regen cost)
- Simplify render() to GPU mesh path only (no conditional branches)
- Remove benchmark state machine and stale mode strings
2026-03-31 01:43:53 +02:00

80 lines
2.9 KiB
HLSL

// BVLE Voxels - Toping BLAS Position Extraction Compute Shader
// Replaces the 196ms CPU loop that computed world-space toping positions.
// Reads vertex templates (t4) + instance positions (t5) + group table (t7),
// writes flat float3 positions (u0) for DXR BLAS construction.
//
// One thread per output vertex. Group table maps global vertex index to
// the correct (instance, local vertex) pair via prefix-sum offsets.
#include "voxelCommon.hlsli"
// Toping mesh vertex (must match C++ TopingVertex, 24 bytes)
struct TopingVtx {
float3 position; // local to voxel [0,1]^3
float3 normal; // unused here, but struct must match
};
// Toping instance (just the world position, 12 bytes)
struct TopingInst {
float3 worldPos;
};
// Draw group descriptor for BLAS extraction (must match C++ TopingBLASGroupGPU, 20 bytes)
struct TopingBLASGroup {
uint globalVertexOffset; // prefix sum: first global vertex index for this group
uint vertexTemplateOffset; // offset into topingVertices (t4)
uint vertexCount; // vertices per instance (mesh slice count)
uint instanceOffset; // offset into topingInstances (t5)
uint instanceCount; // number of instances in this group
};
StructuredBuffer<TopingVtx> topingVertices : register(t4);
StructuredBuffer<TopingInst> topingInstances : register(t5);
StructuredBuffer<TopingBLASGroup> topingGroups : register(t7);
// Output: raw float3 positions (12 bytes each)
RWByteAddressBuffer blasPositions : register(u0);
// Push constants (b999)
struct TopingBLASPush {
uint totalVertices;
uint groupCount;
uint pad0, pad1, pad2, pad3, pad4, pad5, pad6, pad7, pad8, pad9;
};
[[vk::push_constant]] ConstantBuffer<TopingBLASPush> push : register(b999);
void storeFloat3(uint byteOffset, float3 v) {
blasPositions.Store(byteOffset, asuint(v.x));
blasPositions.Store(byteOffset + 4, asuint(v.y));
blasPositions.Store(byteOffset + 8, asuint(v.z));
}
[RootSignature(VOXEL_ROOTSIG)]
[numthreads(64, 1, 1)]
void main(uint3 DTid : SV_DispatchThreadID) {
uint globalIdx = DTid.x;
if (globalIdx >= push.totalVertices) return;
// Find which group this vertex belongs to (linear scan, max ~32 groups)
uint groupIdx = 0;
for (uint g = 1; g < push.groupCount; g++) {
if (globalIdx >= topingGroups[g].globalVertexOffset)
groupIdx = g;
else
break;
}
TopingBLASGroup grp = topingGroups[groupIdx];
// Map global vertex to (instance, local vertex) within this group
uint localIdx = globalIdx - grp.globalVertexOffset;
uint instanceIdx = grp.instanceOffset + localIdx / grp.vertexCount;
uint vertexIdx = grp.vertexTemplateOffset + localIdx % grp.vertexCount;
TopingVtx vtx = topingVertices[vertexIdx];
TopingInst inst = topingInstances[instanceIdx];
float3 worldPos = inst.worldPos + vtx.position;
storeFloat3(globalIdx * 12, worldPos);
}