// BVLE Voxels - GPU Compute Mesher (Binary Face Culling only) // 1 thread per voxel: checks 6 neighbors, emits 1x1 PackedQuad per visible face. // No greedy merge — this is the simple GPU baseline. // Phase 3: blend info is computed per-pixel in the PS (not pre-encoded here). #include "voxelCommon.hlsli" // Push constants: chunk index + output offset struct MeshPush { uint chunkIndex; // which chunk to mesh uint voxelBufferOffset; // offset into the voxel data buffer (in uint16 pairs) uint quadBufferOffset; // offset into the output quad buffer (in quads) uint maxOutputQuads; // safety cap on output uint pad[8]; // pad to 48 bytes (12 x uint32) }; [[vk::push_constant]] ConstantBuffer push : register(b999); // Input: voxel data for one chunk (32^3 = 32768 voxels, packed as uint16 pairs in uint) // Each uint holds 2 voxels: low 16 bits = voxel A, high 16 bits = voxel B StructuredBuffer voxelData : register(t0); // Output: packed quads (append buffer with atomic counter) RWStructuredBuffer outputQuads : register(u0); // uint2 = 8 bytes = PackedQuad RWByteAddressBuffer quadCounter : register(u1); // atomic counter // Constants static const uint CSIZE = 32; static const uint CVOL = CSIZE * CSIZE * CSIZE; // 32768 // Read a single voxel (16-bit) from the packed buffer uint readVoxel(uint flatIndex) { uint pairIndex = flatIndex >> 1; // which uint (2 voxels per uint) uint shift = (flatIndex & 1) * 16; // 0 or 16 return (voxelData[push.voxelBufferOffset + pairIndex] >> shift) & 0xFFFF; } // Check if neighbor is air (handles out-of-bounds as air for chunk boundaries) // Smooth voxels are treated as solid — the smooth Surface Nets mesh covers the boundary. bool isNeighborAir(int3 pos, int3 dir) { int3 n = pos + dir; // Out-of-chunk = treat as air (boundary faces always visible) if (any(n < 0) || any(n >= (int3)CSIZE)) return true; uint flatN = (uint)n.x + (uint)n.y * CSIZE + (uint)n.z * CSIZE * CSIZE; uint nv = readVoxel(flatN); if (nv == 0) return true; // air return false; // any solid (blocky or smooth) → hidden face } // Pack a quad into uint2 (matches CPU PackedQuad format) // chunkIdx is stored in bits [27:17] of hi word for VS lookup uint2 packQuad(uint x, uint y, uint z, uint w, uint h, uint face, uint matID, uint chunkIdx) { uint lo = x | (y << 6) | (z << 12) | (w << 18) | (h << 24) | (face << 30); uint hi = (face >> 2) | (matID << 1) | ((chunkIdx & 0x7FF) << 17); return uint2(lo, hi); } // Face directions static const int3 faceDirs[6] = { int3( 1, 0, 0), int3(-1, 0, 0), int3( 0, 1, 0), int3( 0,-1, 0), int3( 0, 0, 1), int3( 0, 0,-1) }; [RootSignature(VOXEL_ROOTSIG)] [numthreads(8, 8, 8)] // 512 threads = covers 32^3 with 64 groups of 512 void main(uint3 DTid : SV_DispatchThreadID) { if (any(DTid >= CSIZE)) return; uint flatIdx = DTid.x + DTid.y * CSIZE + DTid.z * CSIZE * CSIZE; uint voxel = readVoxel(flatIdx); if (voxel == 0) return; // air voxel, nothing to emit // Phase 5: skip smooth voxels (they have their own Surface Nets mesh) // VoxelData layout: [15:8] matID, [7:4] flags, [3:0] metadata // FLAG_SMOOTH = 0x1 → bit 4 of the packed value uint flags = (voxel >> 4) & 0xF; if (flags & 0x1) return; // smooth voxel, skip blocky mesh uint matID = voxel >> 8; // high 8 bits = material ID // Check each face direction [unroll] for (uint f = 0; f < 6; f++) { if (!isNeighborAir((int3)DTid, faceDirs[f])) continue; // Emit a 1x1 quad uint slot; quadCounter.InterlockedAdd(0, 1, slot); if (slot >= push.maxOutputQuads) return; // overflow guard outputQuads[push.quadBufferOffset + slot] = packQuad( DTid.x, DTid.y, DTid.z, 1, 1, f, matID, push.chunkIndex ); } }