bvle-voxels/shaders/voxelVS.hlsl

174 lines
6 KiB
HLSL
Raw Normal View History

// BVLE Voxels - Vertex Shader (Vertex Pulling from mega-buffer)
// Phase 2: supports both CPU draw loop (push constants) and GPU MDI (binary search).
#include "voxelCommon.hlsli"
struct PackedQuad {
uint2 data; // 8 bytes = 2 x uint32
};
StructuredBuffer<PackedQuad> quadBuffer : register(t0);
StructuredBuffer<GPUChunkInfo> chunkInfoBuffer : register(t2);
// Push constants (48 bytes = 12 x uint32)
// CPU path: chunkIndex + quadOffset explicit
// MDI path: flags bit 0 set, VS derives chunk from SV_VertexID via binary search
struct VoxelPush {
uint chunkIndex;
uint quadOffset; // offset into mega quad buffer (in quads)
uint flags; // bit 0: 1 = MDI mode (binary search), 0 = CPU mode
uint pad0, pad1, pad2, pad3, pad4, pad5, pad6, pad7, pad8;
};
[[vk::push_constant]] ConstantBuffer<VoxelPush> push : register(b999);
struct VSOutput {
float4 position : SV_POSITION;
float3 worldPos : WORLDPOS;
float3 normal : NORMAL;
float2 uv : TEXCOORD0;
nointerpolation uint materialID : MATERIALID;
nointerpolation uint faceID : FACEID;
nointerpolation float debugFlag : DEBUGFLAG;
float ao : AO;
};
// Unpack 64 bits from 2 x uint32
void unpackQuad(uint2 raw, out uint px, out uint py, out uint pz,
out uint w, out uint h, out uint face,
out uint matID, out uint ao)
{
uint lo = raw.x;
uint hi = raw.y;
px = lo & 0x3F;
py = (lo >> 6) & 0x3F;
pz = (lo >> 12) & 0x3F;
w = (lo >> 18) & 0x3F;
h = (lo >> 24) & 0x3F;
face = ((lo >> 30) & 0x3) | ((hi & 0x1) << 2);
matID = (hi >> 1) & 0xFF;
ao = (hi >> 9) & 0xFF;
}
// Binary search: find which chunk owns a given global quad index.
// Chunks are packed contiguously in the mega-buffer, sorted by chunk index.
// O(log2(chunkCount)) = ~11 iterations for 2048 chunks.
uint findChunkIndex(uint globalQuadIndex) {
uint lo = 0, hi = chunkCount;
[loop]
while (lo < hi) {
uint mid = (lo + hi) >> 1;
GPUChunkInfo ci = chunkInfoBuffer[mid];
if (ci.quadOffset + ci.quadCount <= globalQuadIndex)
lo = mid + 1;
else
hi = mid;
}
return lo;
}
// Face normals: +X, -X, +Y, -Y, +Z, -Z
static const float3 faceNormals[6] = {
float3( 1, 0, 0), float3(-1, 0, 0),
float3( 0, 1, 0), float3( 0,-1, 0),
float3( 0, 0, 1), float3( 0, 0,-1)
};
// Face U/V tangent axes for quad expansion
static const float3 faceU[6] = {
float3(0, 1, 0), float3(0, 1, 0),
float3(1, 0, 0), float3(1, 0, 0),
float3(1, 0, 0), float3(1, 0, 0)
};
static const float3 faceV[6] = {
float3(0, 0, 1), float3(0, 0, 1),
float3(0, 0, 1), float3(0, 0, 1),
float3(0, 1, 0), float3(0, 1, 0)
};
[RootSignature(VOXEL_ROOTSIG)]
VSOutput main(uint vertexID : SV_VertexID)
{
VSOutput output;
// Determine quad index and chunk index based on rendering mode
uint quadIndex;
uint chunkIndex = 0;
if (push.flags & 2) {
// GPU mesh path: quads are in a flat buffer, chunk index is embedded
// in each quad's flags field (bits [31:17] of hi word = 11-bit chunk index).
// push.quadOffset = base offset into the GPU quad buffer.
quadIndex = push.quadOffset + (vertexID / 6);
} else if (push.flags & 1) {
// MDI path: push.chunkIndex is packed by ExecuteIndirect command signature:
// low 16 bits = chunk index into chunkInfoBuffer
// high 16 bits = face index (0-5)
// SV_VertexID starts at 0 (startVertexLocation=0), so we compute the
// global quad index from the GPUChunkInfo face offset.
chunkIndex = push.chunkIndex & 0xFFFF;
uint faceIdx = push.chunkIndex >> 16;
GPUChunkInfo ci = chunkInfoBuffer[chunkIndex];
uint faceOff = getFaceOffset(ci, faceIdx);
quadIndex = ci.quadOffset + faceOff + (vertexID / 6);
} else {
// CPU path: push constants provide explicit offsets
quadIndex = push.quadOffset + (vertexID / 6);
chunkIndex = push.chunkIndex;
}
uint cornerIndex = vertexID % 6;
PackedQuad packed = quadBuffer[quadIndex];
uint px, py, pz, w, h, face, matID, ao;
unpackQuad(packed.data, px, py, pz, w, h, face, matID, ao);
// GPU mesh path: extract chunk index from quad flags field (bits [31:17] of hi word)
if (push.flags & 2) {
chunkIndex = (packed.data.y >> 17) & 0x7FF;
}
GPUChunkInfo info = chunkInfoBuffer[chunkIndex];
// Corner offsets for 2 triangles (6 vertices per quad)
// cross(U,V) matches N for faces: +X(0), -Y(3), +Z(4) -> CW corners
// cross(U,V) opposes N for faces: -X(1), +Y(2), -Z(5) -> CCW corners
static const float2 cornersCW[6] = {
float2(0, 0), float2(0, 1), float2(1, 0),
float2(1, 0), float2(0, 1), float2(1, 1)
};
static const float2 cornersCCW[6] = {
float2(0, 0), float2(1, 0), float2(0, 1),
float2(0, 1), float2(1, 0), float2(1, 1)
};
bool useCCW = (face == 1 || face == 2 || face == 5);
float2 corner = useCCW ? cornersCCW[cornerIndex] : cornersCW[cornerIndex];
float3 basePos = float3((float)px, (float)py, (float)pz);
float3 normal = faceNormals[face];
float3 uAxis = faceU[face];
float3 vAxis = faceV[face];
// Positive faces: offset by 1 in normal direction
float3 faceOffset = (face % 2 == 0) ? normal : float3(0, 0, 0);
// Expand quad
float3 localPos = basePos + faceOffset + uAxis * corner.x * (float)w + vAxis * corner.y * (float)h;
float3 worldPos = localPos + info.worldPos.xyz;
output.position = mul(viewProjection, float4(worldPos, 1.0));
output.worldPos = worldPos;
output.normal = normal;
output.uv = corner * float2((float)w, (float)h) * textureTiling;
output.materialID = matID;
output.faceID = face;
output.debugFlag = info.worldPos.w;
// AO: 4 corners x 2 bits
uint aoCorner = min(cornerIndex, 3u);
float aoValue = (float)((ao >> (aoCorner * 2u)) & 3u) / 3.0;
output.ao = 1.0 - aoValue * 0.4;
return output;
}