- Normal render target (R16G16B16A16_SNORM) as MRT SV_TARGET1 in all 3 pixel shaders (voxelPS, voxelTopingPS, voxelSmoothPS) for future RT shadow/AO - BLAS extraction compute shader (voxelBLASExtractCS.hlsl): converts PackedQuad StructuredBuffer to float3 position buffer for DXR BLAS input - Blocky BLAS: single BLAS from all GPU-meshed quads (~1.5M triangles) - Smooth BLAS: single BLAS from smooth vertex buffer directly - TLAS: 2 instances (blocky + smooth), identity transforms, CreateBuffer2 with callback to avoid UpdateBuffer on RAY_TRACING flagged buffers - Fix: Wicked always accesses index_buffer in CreateRaytracingAccelerationStructure via to_internal() even for non-indexed geometry — provide dummy valid buffer
119 lines
4.1 KiB
HLSL
119 lines
4.1 KiB
HLSL
// BVLE Voxels - BLAS Position Extraction Compute Shader (Phase 6.1)
|
||
// Reads GPU-generated PackedQuads and writes flat float3 positions
|
||
// suitable for DXR BLAS construction (non-indexed, 6 vertices per quad).
|
||
//
|
||
// Uses the exact same unpack + winding logic as voxelVS.hlsl.
|
||
// Output is RWByteAddressBuffer (raw buffer) for BLAS vertex compatibility.
|
||
|
||
#include "voxelCommon.hlsli"
|
||
|
||
struct PackedQuad {
|
||
uint2 data; // 8 bytes = 2 x uint32
|
||
};
|
||
|
||
StructuredBuffer<PackedQuad> quadBuffer : register(t0);
|
||
StructuredBuffer<GPUChunkInfo> chunkInfoBuffer : register(t2);
|
||
|
||
// Output: raw float3 positions (12 bytes each), 6 per quad
|
||
RWByteAddressBuffer blasPositions : register(u0);
|
||
|
||
// Push constants (b999)
|
||
struct BLASPush {
|
||
uint quadCount;
|
||
uint pad0, pad1, pad2, pad3, pad4, pad5, pad6, pad7, pad8, pad9, pad10;
|
||
};
|
||
[[vk::push_constant]] ConstantBuffer<BLASPush> push : register(b999);
|
||
|
||
// ── Face direction tables (SAME as voxelVS.hlsl) ───────────────────
|
||
static const float3 faceNormals[6] = {
|
||
float3( 1, 0, 0), float3(-1, 0, 0),
|
||
float3( 0, 1, 0), float3( 0,-1, 0),
|
||
float3( 0, 0, 1), float3( 0, 0,-1)
|
||
};
|
||
|
||
static const float3 faceU[6] = {
|
||
float3(0, 1, 0), float3(0, 1, 0),
|
||
float3(1, 0, 0), float3(1, 0, 0),
|
||
float3(1, 0, 0), float3(1, 0, 0)
|
||
};
|
||
|
||
static const float3 faceV[6] = {
|
||
float3(0, 0, 1), float3(0, 0, 1),
|
||
float3(0, 0, 1), float3(0, 0, 1),
|
||
float3(0, 1, 0), float3(0, 1, 0)
|
||
};
|
||
|
||
// Helper: store float3 at byte offset in raw buffer
|
||
void storeFloat3(uint byteOffset, float3 v) {
|
||
blasPositions.Store(byteOffset, asuint(v.x));
|
||
blasPositions.Store(byteOffset + 4, asuint(v.y));
|
||
blasPositions.Store(byteOffset + 8, asuint(v.z));
|
||
}
|
||
|
||
// ── Quad unpacking (SAME as voxelVS.hlsl + chunkIndex from GPU mesh bits) ──
|
||
void unpackQuad(uint2 raw, out uint px, out uint py, out uint pz,
|
||
out uint w, out uint h, out uint face, out uint chunkIdx)
|
||
{
|
||
uint lo = raw.x;
|
||
uint hi = raw.y;
|
||
px = lo & 0x3F;
|
||
py = (lo >> 6) & 0x3F;
|
||
pz = (lo >> 12) & 0x3F;
|
||
w = (lo >> 18) & 0x3F;
|
||
h = (lo >> 24) & 0x3F;
|
||
face = ((lo >> 30) & 0x3) | ((hi & 0x1) << 2);
|
||
// GPU mesh path: chunkIndex in bits [27:17] of hi word
|
||
chunkIdx = (hi >> 17) & 0x7FF;
|
||
}
|
||
|
||
[RootSignature(VOXEL_ROOTSIG)]
|
||
[numthreads(64, 1, 1)]
|
||
void main(uint3 DTid : SV_DispatchThreadID) {
|
||
uint quadIdx = DTid.x;
|
||
if (quadIdx >= push.quadCount) return;
|
||
|
||
PackedQuad packed = quadBuffer[quadIdx];
|
||
uint px, py, pz, w, h, face, chunkIdx;
|
||
unpackQuad(packed.data, px, py, pz, w, h, face, chunkIdx);
|
||
|
||
GPUChunkInfo info = chunkInfoBuffer[chunkIdx];
|
||
|
||
// ── Compute 4 corner world positions (same math as voxelVS.hlsl) ──
|
||
float3 basePos = float3((float)px, (float)py, (float)pz);
|
||
float3 normal = faceNormals[face];
|
||
float3 uAxis = faceU[face];
|
||
float3 vAxis = faceV[face];
|
||
|
||
// Positive faces: offset by 1 in normal direction
|
||
float3 faceOffset = (face % 2 == 0) ? normal : float3(0, 0, 0);
|
||
|
||
float3 origin = basePos + faceOffset + info.worldPos.xyz;
|
||
float3 p00 = origin;
|
||
float3 p10 = origin + uAxis * (float)w;
|
||
float3 p01 = origin + vAxis * (float)h;
|
||
float3 p11 = origin + uAxis * (float)w + vAxis * (float)h;
|
||
|
||
// ── Winding: must match voxelVS.hlsl ──
|
||
// CW for faces 0,3,4 ; CCW for faces 1,2,5
|
||
bool useCCW = (face == 1 || face == 2 || face == 5);
|
||
|
||
// 6 vertices × 12 bytes (float3) = 72 bytes per quad
|
||
uint byteBase = quadIdx * 72;
|
||
if (useCCW) {
|
||
// CCW: (0,0)(1,0)(0,1), (0,1)(1,0)(1,1)
|
||
storeFloat3(byteBase + 0, p00);
|
||
storeFloat3(byteBase + 12, p10);
|
||
storeFloat3(byteBase + 24, p01);
|
||
storeFloat3(byteBase + 36, p01);
|
||
storeFloat3(byteBase + 48, p10);
|
||
storeFloat3(byteBase + 60, p11);
|
||
} else {
|
||
// CW: (0,0)(0,1)(1,0), (1,0)(0,1)(1,1)
|
||
storeFloat3(byteBase + 0, p00);
|
||
storeFloat3(byteBase + 12, p01);
|
||
storeFloat3(byteBase + 24, p10);
|
||
storeFloat3(byteBase + 36, p10);
|
||
storeFloat3(byteBase + 48, p01);
|
||
storeFloat3(byteBase + 60, p11);
|
||
}
|
||
}
|