// BVLE Voxels - BLAS Position Extraction Compute Shader (Phase 6.1) // Reads GPU-generated PackedQuads and writes flat float3 positions // suitable for DXR BLAS construction (non-indexed, 6 vertices per quad). // // Uses the exact same unpack + winding logic as voxelVS.hlsl. // Output is RWByteAddressBuffer (raw buffer) for BLAS vertex compatibility. #include "voxelCommon.hlsli" struct PackedQuad { uint2 data; // 8 bytes = 2 x uint32 }; StructuredBuffer quadBuffer : register(t0); StructuredBuffer chunkInfoBuffer : register(t2); // Output: raw float3 positions (12 bytes each), 6 per quad RWByteAddressBuffer blasPositions : register(u0); // Push constants (b999) struct BLASPush { uint quadCount; uint pad0, pad1, pad2, pad3, pad4, pad5, pad6, pad7, pad8, pad9, pad10; }; [[vk::push_constant]] ConstantBuffer push : register(b999); // ── Face direction tables (SAME as voxelVS.hlsl) ─────────────────── static const float3 faceNormals[6] = { float3( 1, 0, 0), float3(-1, 0, 0), float3( 0, 1, 0), float3( 0,-1, 0), float3( 0, 0, 1), float3( 0, 0,-1) }; static const float3 faceU[6] = { float3(0, 1, 0), float3(0, 1, 0), float3(1, 0, 0), float3(1, 0, 0), float3(1, 0, 0), float3(1, 0, 0) }; static const float3 faceV[6] = { float3(0, 0, 1), float3(0, 0, 1), float3(0, 0, 1), float3(0, 0, 1), float3(0, 1, 0), float3(0, 1, 0) }; // Helper: store float3 at byte offset in raw buffer void storeFloat3(uint byteOffset, float3 v) { blasPositions.Store(byteOffset, asuint(v.x)); blasPositions.Store(byteOffset + 4, asuint(v.y)); blasPositions.Store(byteOffset + 8, asuint(v.z)); } // ── Quad unpacking (SAME as voxelVS.hlsl + chunkIndex from GPU mesh bits) ── void unpackQuad(uint2 raw, out uint px, out uint py, out uint pz, out uint w, out uint h, out uint face, out uint chunkIdx) { uint lo = raw.x; uint hi = raw.y; px = lo & 0x3F; py = (lo >> 6) & 0x3F; pz = (lo >> 12) & 0x3F; w = (lo >> 18) & 0x3F; h = (lo >> 24) & 0x3F; face = ((lo >> 30) & 0x3) | ((hi & 0x1) << 2); // GPU mesh path: chunkIndex in bits [27:17] of hi word chunkIdx = (hi >> 17) & 0x7FF; } [RootSignature(VOXEL_ROOTSIG)] [numthreads(64, 1, 1)] void main(uint3 DTid : SV_DispatchThreadID) { uint quadIdx = DTid.x; if (quadIdx >= push.quadCount) return; PackedQuad packed = quadBuffer[quadIdx]; uint px, py, pz, w, h, face, chunkIdx; unpackQuad(packed.data, px, py, pz, w, h, face, chunkIdx); GPUChunkInfo info = chunkInfoBuffer[chunkIdx]; // ── Compute 4 corner world positions (same math as voxelVS.hlsl) ── float3 basePos = float3((float)px, (float)py, (float)pz); float3 normal = faceNormals[face]; float3 uAxis = faceU[face]; float3 vAxis = faceV[face]; // Positive faces: offset by 1 in normal direction float3 faceOffset = (face % 2 == 0) ? normal : float3(0, 0, 0); float3 origin = basePos + faceOffset + info.worldPos.xyz; float3 p00 = origin; float3 p10 = origin + uAxis * (float)w; float3 p01 = origin + vAxis * (float)h; float3 p11 = origin + uAxis * (float)w + vAxis * (float)h; // ── Winding: must match voxelVS.hlsl ── // CW for faces 0,3,4 ; CCW for faces 1,2,5 bool useCCW = (face == 1 || face == 2 || face == 5); // 6 vertices × 12 bytes (float3) = 72 bytes per quad uint byteBase = quadIdx * 72; if (useCCW) { // CCW: (0,0)(1,0)(0,1), (0,1)(1,0)(1,1) storeFloat3(byteBase + 0, p00); storeFloat3(byteBase + 12, p10); storeFloat3(byteBase + 24, p01); storeFloat3(byteBase + 36, p01); storeFloat3(byteBase + 48, p10); storeFloat3(byteBase + 60, p11); } else { // CW: (0,0)(0,1)(1,0), (1,0)(0,1)(1,1) storeFloat3(byteBase + 0, p00); storeFloat3(byteBase + 12, p01); storeFloat3(byteBase + 24, p10); storeFloat3(byteBase + 36, p10); storeFloat3(byteBase + 48, p01); storeFloat3(byteBase + 60, p11); } }