- Remove ~430 lines of dead CPU mesh, MDI, and GPU cull render paths (rebuildMegaBuffer, IndirectDrawArgs, drawCountBuffer, cullShader, etc.) - Add voxelTopingBLASCS.hlsl compute shader replacing 196ms CPU loop for toping BLAS position extraction (<1ms on GPU) - Reduce animation rate from 60Hz to 30Hz (halves CPU regen cost) - Simplify render() to GPU mesh path only (no conditional branches) - Remove benchmark state machine and stale mode strings
80 lines
2.9 KiB
HLSL
80 lines
2.9 KiB
HLSL
// BVLE Voxels - Toping BLAS Position Extraction Compute Shader
|
|
// Replaces the 196ms CPU loop that computed world-space toping positions.
|
|
// Reads vertex templates (t4) + instance positions (t5) + group table (t7),
|
|
// writes flat float3 positions (u0) for DXR BLAS construction.
|
|
//
|
|
// One thread per output vertex. Group table maps global vertex index to
|
|
// the correct (instance, local vertex) pair via prefix-sum offsets.
|
|
|
|
#include "voxelCommon.hlsli"
|
|
|
|
// Toping mesh vertex (must match C++ TopingVertex, 24 bytes)
|
|
struct TopingVtx {
|
|
float3 position; // local to voxel [0,1]^3
|
|
float3 normal; // unused here, but struct must match
|
|
};
|
|
|
|
// Toping instance (just the world position, 12 bytes)
|
|
struct TopingInst {
|
|
float3 worldPos;
|
|
};
|
|
|
|
// Draw group descriptor for BLAS extraction (must match C++ TopingBLASGroupGPU, 20 bytes)
|
|
struct TopingBLASGroup {
|
|
uint globalVertexOffset; // prefix sum: first global vertex index for this group
|
|
uint vertexTemplateOffset; // offset into topingVertices (t4)
|
|
uint vertexCount; // vertices per instance (mesh slice count)
|
|
uint instanceOffset; // offset into topingInstances (t5)
|
|
uint instanceCount; // number of instances in this group
|
|
};
|
|
|
|
StructuredBuffer<TopingVtx> topingVertices : register(t4);
|
|
StructuredBuffer<TopingInst> topingInstances : register(t5);
|
|
StructuredBuffer<TopingBLASGroup> topingGroups : register(t7);
|
|
|
|
// Output: raw float3 positions (12 bytes each)
|
|
RWByteAddressBuffer blasPositions : register(u0);
|
|
|
|
// Push constants (b999)
|
|
struct TopingBLASPush {
|
|
uint totalVertices;
|
|
uint groupCount;
|
|
uint pad0, pad1, pad2, pad3, pad4, pad5, pad6, pad7, pad8, pad9;
|
|
};
|
|
[[vk::push_constant]] ConstantBuffer<TopingBLASPush> push : register(b999);
|
|
|
|
void storeFloat3(uint byteOffset, float3 v) {
|
|
blasPositions.Store(byteOffset, asuint(v.x));
|
|
blasPositions.Store(byteOffset + 4, asuint(v.y));
|
|
blasPositions.Store(byteOffset + 8, asuint(v.z));
|
|
}
|
|
|
|
[RootSignature(VOXEL_ROOTSIG)]
|
|
[numthreads(64, 1, 1)]
|
|
void main(uint3 DTid : SV_DispatchThreadID) {
|
|
uint globalIdx = DTid.x;
|
|
if (globalIdx >= push.totalVertices) return;
|
|
|
|
// Find which group this vertex belongs to (linear scan, max ~32 groups)
|
|
uint groupIdx = 0;
|
|
for (uint g = 1; g < push.groupCount; g++) {
|
|
if (globalIdx >= topingGroups[g].globalVertexOffset)
|
|
groupIdx = g;
|
|
else
|
|
break;
|
|
}
|
|
|
|
TopingBLASGroup grp = topingGroups[groupIdx];
|
|
|
|
// Map global vertex to (instance, local vertex) within this group
|
|
uint localIdx = globalIdx - grp.globalVertexOffset;
|
|
uint instanceIdx = grp.instanceOffset + localIdx / grp.vertexCount;
|
|
uint vertexIdx = grp.vertexTemplateOffset + localIdx % grp.vertexCount;
|
|
|
|
TopingVtx vtx = topingVertices[vertexIdx];
|
|
TopingInst inst = topingInstances[instanceIdx];
|
|
|
|
float3 worldPos = inst.worldPos + vtx.position;
|
|
|
|
storeFloat3(globalIdx * 12, worldPos);
|
|
}
|