bvle-voxels/shaders/voxelMeshCS.hlsl

89 lines
3.4 KiB
HLSL
Raw Normal View History

// BVLE Voxels - GPU Compute Mesher (Binary Face Culling only)
// 1 thread per voxel: checks 6 neighbors, emits 1x1 PackedQuad per visible face.
// No greedy merge — this is the simple GPU baseline.
// Phase 3: blend info is computed per-pixel in the PS (not pre-encoded here).
#include "voxelCommon.hlsli"
// Push constants: chunk index + output offset
struct MeshPush {
uint chunkIndex; // which chunk to mesh
uint voxelBufferOffset; // offset into the voxel data buffer (in uint16 pairs)
uint quadBufferOffset; // offset into the output quad buffer (in quads)
uint maxOutputQuads; // safety cap on output
uint pad[8]; // pad to 48 bytes (12 x uint32)
};
[[vk::push_constant]] ConstantBuffer<MeshPush> push : register(b999);
// Input: voxel data for one chunk (32^3 = 32768 voxels, packed as uint16 pairs in uint)
// Each uint holds 2 voxels: low 16 bits = voxel A, high 16 bits = voxel B
StructuredBuffer<uint> voxelData : register(t0);
// Output: packed quads (append buffer with atomic counter)
RWStructuredBuffer<uint2> outputQuads : register(u0); // uint2 = 8 bytes = PackedQuad
RWByteAddressBuffer quadCounter : register(u1); // atomic counter
// Constants
static const uint CSIZE = 32;
static const uint CVOL = CSIZE * CSIZE * CSIZE; // 32768
// Read a single voxel (16-bit) from the packed buffer
uint readVoxel(uint flatIndex) {
uint pairIndex = flatIndex >> 1; // which uint (2 voxels per uint)
uint shift = (flatIndex & 1) * 16; // 0 or 16
return (voxelData[push.voxelBufferOffset + pairIndex] >> shift) & 0xFFFF;
}
// Check if neighbor is air (handles out-of-bounds as air for chunk boundaries)
bool isNeighborAir(int3 pos, int3 dir) {
int3 n = pos + dir;
// Out-of-chunk = treat as air (boundary faces always visible)
if (any(n < 0) || any(n >= (int3)CSIZE))
return true;
uint flatN = (uint)n.x + (uint)n.y * CSIZE + (uint)n.z * CSIZE * CSIZE;
return readVoxel(flatN) == 0; // materialID 0 = air
}
// Pack a quad into uint2 (matches CPU PackedQuad format)
// chunkIdx is stored in bits [27:17] of hi word for VS lookup
uint2 packQuad(uint x, uint y, uint z, uint w, uint h, uint face, uint matID, uint chunkIdx) {
uint lo = x | (y << 6) | (z << 12) | (w << 18) | (h << 24) | (face << 30);
uint hi = (face >> 2) | (matID << 1) | ((chunkIdx & 0x7FF) << 17);
return uint2(lo, hi);
}
// Face directions
static const int3 faceDirs[6] = {
int3( 1, 0, 0), int3(-1, 0, 0),
int3( 0, 1, 0), int3( 0,-1, 0),
int3( 0, 0, 1), int3( 0, 0,-1)
};
[RootSignature(VOXEL_ROOTSIG)]
[numthreads(8, 8, 8)] // 512 threads = covers 32^3 with 64 groups of 512
void main(uint3 DTid : SV_DispatchThreadID)
{
if (any(DTid >= CSIZE)) return;
uint flatIdx = DTid.x + DTid.y * CSIZE + DTid.z * CSIZE * CSIZE;
uint voxel = readVoxel(flatIdx);
if (voxel == 0) return; // air voxel, nothing to emit
uint matID = voxel >> 8; // high 8 bits = material ID
// Check each face direction
[unroll]
for (uint f = 0; f < 6; f++) {
if (!isNeighborAir((int3)DTid, faceDirs[f])) continue;
// Emit a 1x1 quad
uint slot;
quadCounter.InterlockedAdd(0, 1, slot);
if (slot >= push.maxOutputQuads) return; // overflow guard
outputQuads[push.quadBufferOffset + slot] = packQuad(
DTid.x, DTid.y, DTid.z, 1, 1, f, matID, push.chunkIndex
);
}
}