bvle-voxels/shaders/voxelBLASExtractCS.hlsl

120 lines
4.1 KiB
HLSL
Raw Permalink Normal View History

// BVLE Voxels - BLAS Position Extraction Compute Shader (Phase 6.1)
// Reads GPU-generated PackedQuads and writes flat float3 positions
// suitable for DXR BLAS construction (non-indexed, 6 vertices per quad).
//
// Uses the exact same unpack + winding logic as voxelVS.hlsl.
// Output is RWByteAddressBuffer (raw buffer) for BLAS vertex compatibility.
#include "voxelCommon.hlsli"
struct PackedQuad {
uint2 data; // 8 bytes = 2 x uint32
};
StructuredBuffer<PackedQuad> quadBuffer : register(t0);
StructuredBuffer<GPUChunkInfo> chunkInfoBuffer : register(t2);
// Output: raw float3 positions (12 bytes each), 6 per quad
RWByteAddressBuffer blasPositions : register(u0);
// Push constants (b999)
struct BLASPush {
uint quadCount;
uint pad0, pad1, pad2, pad3, pad4, pad5, pad6, pad7, pad8, pad9, pad10;
};
[[vk::push_constant]] ConstantBuffer<BLASPush> push : register(b999);
// ── Face direction tables (SAME as voxelVS.hlsl) ───────────────────
static const float3 faceNormals[6] = {
float3( 1, 0, 0), float3(-1, 0, 0),
float3( 0, 1, 0), float3( 0,-1, 0),
float3( 0, 0, 1), float3( 0, 0,-1)
};
static const float3 faceU[6] = {
float3(0, 1, 0), float3(0, 1, 0),
float3(1, 0, 0), float3(1, 0, 0),
float3(1, 0, 0), float3(1, 0, 0)
};
static const float3 faceV[6] = {
float3(0, 0, 1), float3(0, 0, 1),
float3(0, 0, 1), float3(0, 0, 1),
float3(0, 1, 0), float3(0, 1, 0)
};
// Helper: store float3 at byte offset in raw buffer
void storeFloat3(uint byteOffset, float3 v) {
blasPositions.Store(byteOffset, asuint(v.x));
blasPositions.Store(byteOffset + 4, asuint(v.y));
blasPositions.Store(byteOffset + 8, asuint(v.z));
}
// ── Quad unpacking (SAME as voxelVS.hlsl + chunkIndex from GPU mesh bits) ──
void unpackQuad(uint2 raw, out uint px, out uint py, out uint pz,
out uint w, out uint h, out uint face, out uint chunkIdx)
{
uint lo = raw.x;
uint hi = raw.y;
px = lo & 0x3F;
py = (lo >> 6) & 0x3F;
pz = (lo >> 12) & 0x3F;
w = (lo >> 18) & 0x3F;
h = (lo >> 24) & 0x3F;
face = ((lo >> 30) & 0x3) | ((hi & 0x1) << 2);
// GPU mesh path: chunkIndex in bits [27:17] of hi word
chunkIdx = (hi >> 17) & 0x7FF;
}
[RootSignature(VOXEL_ROOTSIG)]
[numthreads(64, 1, 1)]
void main(uint3 DTid : SV_DispatchThreadID) {
uint quadIdx = DTid.x;
if (quadIdx >= push.quadCount) return;
PackedQuad packed = quadBuffer[quadIdx];
uint px, py, pz, w, h, face, chunkIdx;
unpackQuad(packed.data, px, py, pz, w, h, face, chunkIdx);
GPUChunkInfo info = chunkInfoBuffer[chunkIdx];
// ── Compute 4 corner world positions (same math as voxelVS.hlsl) ──
float3 basePos = float3((float)px, (float)py, (float)pz);
float3 normal = faceNormals[face];
float3 uAxis = faceU[face];
float3 vAxis = faceV[face];
// Positive faces: offset by 1 in normal direction
float3 faceOffset = (face % 2 == 0) ? normal : float3(0, 0, 0);
float3 origin = basePos + faceOffset + info.worldPos.xyz;
float3 p00 = origin;
float3 p10 = origin + uAxis * (float)w;
float3 p01 = origin + vAxis * (float)h;
float3 p11 = origin + uAxis * (float)w + vAxis * (float)h;
// ── Winding: must match voxelVS.hlsl ──
// CW for faces 0,3,4 ; CCW for faces 1,2,5
bool useCCW = (face == 1 || face == 2 || face == 5);
// 6 vertices × 12 bytes (float3) = 72 bytes per quad
uint byteBase = quadIdx * 72;
if (useCCW) {
// CCW: (0,0)(1,0)(0,1), (0,1)(1,0)(1,1)
storeFloat3(byteBase + 0, p00);
storeFloat3(byteBase + 12, p10);
storeFloat3(byteBase + 24, p01);
storeFloat3(byteBase + 36, p01);
storeFloat3(byteBase + 48, p10);
storeFloat3(byteBase + 60, p11);
} else {
// CW: (0,0)(0,1)(1,0), (1,0)(0,1)(1,1)
storeFloat3(byteBase + 0, p00);
storeFloat3(byteBase + 12, p01);
storeFloat3(byteBase + 24, p10);
storeFloat3(byteBase + 36, p10);
storeFloat3(byteBase + 48, p01);
storeFloat3(byteBase + 60, p11);
}
}