bvle-voxels/shaders/voxelBLASExtractCS.hlsl
Samuel Bouchet 7f36bdae38 Phase 6.1: RT infrastructure — MRT normals + BLAS/TLAS build
- Normal render target (R16G16B16A16_SNORM) as MRT SV_TARGET1 in all 3 pixel
  shaders (voxelPS, voxelTopingPS, voxelSmoothPS) for future RT shadow/AO
- BLAS extraction compute shader (voxelBLASExtractCS.hlsl): converts PackedQuad
  StructuredBuffer to float3 position buffer for DXR BLAS input
- Blocky BLAS: single BLAS from all GPU-meshed quads (~1.5M triangles)
- Smooth BLAS: single BLAS from smooth vertex buffer directly
- TLAS: 2 instances (blocky + smooth), identity transforms, CreateBuffer2 with
  callback to avoid UpdateBuffer on RAY_TRACING flagged buffers
- Fix: Wicked always accesses index_buffer in CreateRaytracingAccelerationStructure
  via to_internal() even for non-indexed geometry — provide dummy valid buffer
2026-03-28 14:48:11 +01:00

119 lines
4.1 KiB
HLSL
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// BVLE Voxels - BLAS Position Extraction Compute Shader (Phase 6.1)
// Reads GPU-generated PackedQuads and writes flat float3 positions
// suitable for DXR BLAS construction (non-indexed, 6 vertices per quad).
//
// Uses the exact same unpack + winding logic as voxelVS.hlsl.
// Output is RWByteAddressBuffer (raw buffer) for BLAS vertex compatibility.
#include "voxelCommon.hlsli"
struct PackedQuad {
uint2 data; // 8 bytes = 2 x uint32
};
StructuredBuffer<PackedQuad> quadBuffer : register(t0);
StructuredBuffer<GPUChunkInfo> chunkInfoBuffer : register(t2);
// Output: raw float3 positions (12 bytes each), 6 per quad
RWByteAddressBuffer blasPositions : register(u0);
// Push constants (b999)
struct BLASPush {
uint quadCount;
uint pad0, pad1, pad2, pad3, pad4, pad5, pad6, pad7, pad8, pad9, pad10;
};
[[vk::push_constant]] ConstantBuffer<BLASPush> push : register(b999);
// ── Face direction tables (SAME as voxelVS.hlsl) ───────────────────
static const float3 faceNormals[6] = {
float3( 1, 0, 0), float3(-1, 0, 0),
float3( 0, 1, 0), float3( 0,-1, 0),
float3( 0, 0, 1), float3( 0, 0,-1)
};
static const float3 faceU[6] = {
float3(0, 1, 0), float3(0, 1, 0),
float3(1, 0, 0), float3(1, 0, 0),
float3(1, 0, 0), float3(1, 0, 0)
};
static const float3 faceV[6] = {
float3(0, 0, 1), float3(0, 0, 1),
float3(0, 0, 1), float3(0, 0, 1),
float3(0, 1, 0), float3(0, 1, 0)
};
// Helper: store float3 at byte offset in raw buffer
void storeFloat3(uint byteOffset, float3 v) {
blasPositions.Store(byteOffset, asuint(v.x));
blasPositions.Store(byteOffset + 4, asuint(v.y));
blasPositions.Store(byteOffset + 8, asuint(v.z));
}
// ── Quad unpacking (SAME as voxelVS.hlsl + chunkIndex from GPU mesh bits) ──
void unpackQuad(uint2 raw, out uint px, out uint py, out uint pz,
out uint w, out uint h, out uint face, out uint chunkIdx)
{
uint lo = raw.x;
uint hi = raw.y;
px = lo & 0x3F;
py = (lo >> 6) & 0x3F;
pz = (lo >> 12) & 0x3F;
w = (lo >> 18) & 0x3F;
h = (lo >> 24) & 0x3F;
face = ((lo >> 30) & 0x3) | ((hi & 0x1) << 2);
// GPU mesh path: chunkIndex in bits [27:17] of hi word
chunkIdx = (hi >> 17) & 0x7FF;
}
[RootSignature(VOXEL_ROOTSIG)]
[numthreads(64, 1, 1)]
void main(uint3 DTid : SV_DispatchThreadID) {
uint quadIdx = DTid.x;
if (quadIdx >= push.quadCount) return;
PackedQuad packed = quadBuffer[quadIdx];
uint px, py, pz, w, h, face, chunkIdx;
unpackQuad(packed.data, px, py, pz, w, h, face, chunkIdx);
GPUChunkInfo info = chunkInfoBuffer[chunkIdx];
// ── Compute 4 corner world positions (same math as voxelVS.hlsl) ──
float3 basePos = float3((float)px, (float)py, (float)pz);
float3 normal = faceNormals[face];
float3 uAxis = faceU[face];
float3 vAxis = faceV[face];
// Positive faces: offset by 1 in normal direction
float3 faceOffset = (face % 2 == 0) ? normal : float3(0, 0, 0);
float3 origin = basePos + faceOffset + info.worldPos.xyz;
float3 p00 = origin;
float3 p10 = origin + uAxis * (float)w;
float3 p01 = origin + vAxis * (float)h;
float3 p11 = origin + uAxis * (float)w + vAxis * (float)h;
// ── Winding: must match voxelVS.hlsl ──
// CW for faces 0,3,4 ; CCW for faces 1,2,5
bool useCCW = (face == 1 || face == 2 || face == 5);
// 6 vertices × 12 bytes (float3) = 72 bytes per quad
uint byteBase = quadIdx * 72;
if (useCCW) {
// CCW: (0,0)(1,0)(0,1), (0,1)(1,0)(1,1)
storeFloat3(byteBase + 0, p00);
storeFloat3(byteBase + 12, p10);
storeFloat3(byteBase + 24, p01);
storeFloat3(byteBase + 36, p01);
storeFloat3(byteBase + 48, p10);
storeFloat3(byteBase + 60, p11);
} else {
// CW: (0,0)(0,1)(1,0), (1,0)(0,1)(1,1)
storeFloat3(byteBase + 0, p00);
storeFloat3(byteBase + 12, p01);
storeFloat3(byteBase + 24, p10);
storeFloat3(byteBase + 36, p10);
storeFloat3(byteBase + 48, p01);
storeFloat3(byteBase + 60, p11);
}
}