// BVLE Voxels - GPU Smooth Mesher Pass 2: Emit with Smooth Normals // Reads ONLY from centroid grid (written by pass 1). No voxel buffer access. // This keeps the shader simple and fast to compile. // // Centroid grid format (float4 per cell, cells [-1..32]): // xyz = chunk-local position (valid for surface cells) // w = packed flags: bit24=valid, bit25=solid, [7:0]=mat, [15:8]=secMat, [23:16]=blend // // Dispatch: 4x4x4 groups of 8x8x8 threads per chunk (cells [0..31]) #include "voxelCommon.hlsli" struct SmoothPush { uint chunkIndex; uint voxelBufferOffset; // unused in this shader uint maxOutputVerts; uint centroidGridOffset; uint pad[8]; }; [[vk::push_constant]] ConstantBuffer push : register(b999); StructuredBuffer chunkInfo : register(t1); StructuredBuffer centroidGrid : register(t2); struct GPUSmoothVertex { float px, py, pz; float nx, ny, nz; uint packedMat; uint packedChunk; }; RWStructuredBuffer outputVerts : register(u0); RWByteAddressBuffer vertCounter : register(u1); static const uint CSIZE = 32; static const uint GRID_DIM = 34; // ── Grid access helpers ───────────────────────────────────────────── uint gridIndex(int3 cellPos) { return push.centroidGridOffset + (uint)(cellPos.z + 1) * GRID_DIM * GRID_DIM + (uint)(cellPos.y + 1) * GRID_DIM + (uint)(cellPos.x + 1); } uint readGridPacked(int3 cellPos) { if (any(cellPos < -1) || any(cellPos > 32)) return 0; return asuint(centroidGrid[gridIndex(cellPos)].w); } bool isCentroidValid(int3 cellPos) { return (readGridPacked(cellPos) >> 24) & 1; } bool isCellSolid(int3 cellPos) { return ((readGridPacked(cellPos) >> 25) & 1) != 0; } float3 readCentroidPos(int3 cellPos) { return centroidGrid[gridIndex(cellPos)].xyz; } // ── Face normal for one quad (4 sharing cells) ────────────────────── float3 computeQuadFaceNormal(int3 c0, int3 c1, int3 c2, int3 c3, bool solid0, int edgeAxis) { if (!isCentroidValid(c0) || !isCentroidValid(c1) || !isCentroidValid(c2) || !isCentroidValid(c3)) return float3(0, 0, 0); float3 p0 = readCentroidPos(c0); float3 p1 = readCentroidPos(c1); float3 p3 = readCentroidPos(c3); float3 fn = cross(p1 - p0, p3 - p0); // Orient: solid→empty direction int s = solid0 ? +1 : -1; float fnAxis = (edgeAxis == 0) ? fn.x : ((edgeAxis == 1) ? fn.y : fn.z); if ((fnAxis > 0.0) != (s > 0)) fn = -fn; return fn; // area-weighted (not normalized) } // ── Smooth normal for a vertex at cell v ──────────────────────────── // Checks all 12 incident edges (4 per axis), computes face normals from // centroid grid, averages them. All reads from grid only. float3 computeSmoothNormal(int3 v) { float3 accum = float3(0, 0, 0); // X-edges: at (v.x, v.y+dy, v.z+dz) for dy,dz in {0,1} { bool sv = isCellSolid(v); bool sv_x1 = isCellSolid(v + int3(1,0,0)); bool sv_01 = isCellSolid(int3(v.x, v.y+1, v.z)); bool sv_01_x1 = isCellSolid(int3(v.x+1, v.y+1, v.z)); bool sv_10 = isCellSolid(int3(v.x, v.y, v.z+1)); bool sv_10_x1 = isCellSolid(int3(v.x+1, v.y, v.z+1)); bool sv_11 = isCellSolid(int3(v.x, v.y+1, v.z+1)); bool sv_11_x1 = isCellSolid(int3(v.x+1, v.y+1, v.z+1)); // Edge (v.x, v.y, v.z) if (sv != sv_x1) { accum += computeQuadFaceNormal( v + int3(0,-1,-1), v + int3(0,0,-1), v + int3(0,-1,0), v, sv, 0); } // Edge (v.x, v.y+1, v.z) if (sv_01 != sv_01_x1) { accum += computeQuadFaceNormal( int3(v.x, v.y, v.z-1), int3(v.x, v.y+1, v.z-1), v, int3(v.x, v.y+1, v.z), sv_01, 0); } // Edge (v.x, v.y, v.z+1) if (sv_10 != sv_10_x1) { accum += computeQuadFaceNormal( int3(v.x, v.y-1, v.z), v, int3(v.x, v.y-1, v.z+1), int3(v.x, v.y, v.z+1), sv_10, 0); } // Edge (v.x, v.y+1, v.z+1) if (sv_11 != sv_11_x1) { accum += computeQuadFaceNormal( v, int3(v.x, v.y+1, v.z), int3(v.x, v.y, v.z+1), int3(v.x, v.y+1, v.z+1), sv_11, 0); } } // Y-edges: at (v.x+dx, v.y, v.z+dz) for dx,dz in {0,1} { bool sv = isCellSolid(v); bool sv_y1 = isCellSolid(v + int3(0,1,0)); bool sv_10 = isCellSolid(int3(v.x+1, v.y, v.z)); bool sv_10_y1 = isCellSolid(int3(v.x+1, v.y+1, v.z)); bool sv_01 = isCellSolid(int3(v.x, v.y, v.z+1)); bool sv_01_y1 = isCellSolid(int3(v.x, v.y+1, v.z+1)); bool sv_11 = isCellSolid(int3(v.x+1, v.y, v.z+1)); bool sv_11_y1 = isCellSolid(int3(v.x+1, v.y+1, v.z+1)); if (sv != sv_y1) { accum += computeQuadFaceNormal( v + int3(-1,0,-1), v + int3(0,0,-1), v + int3(-1,0,0), v, sv, 1); } if (sv_10 != sv_10_y1) { accum += computeQuadFaceNormal( int3(v.x, v.y, v.z-1), int3(v.x+1, v.y, v.z-1), v, int3(v.x+1, v.y, v.z), sv_10, 1); } if (sv_01 != sv_01_y1) { accum += computeQuadFaceNormal( int3(v.x-1, v.y, v.z), v, int3(v.x-1, v.y, v.z+1), int3(v.x, v.y, v.z+1), sv_01, 1); } if (sv_11 != sv_11_y1) { accum += computeQuadFaceNormal( v, int3(v.x+1, v.y, v.z), int3(v.x, v.y, v.z+1), int3(v.x+1, v.y, v.z+1), sv_11, 1); } } // Z-edges: at (v.x+dx, v.y+dy, v.z) for dx,dy in {0,1} { bool sv = isCellSolid(v); bool sv_z1 = isCellSolid(v + int3(0,0,1)); bool sv_10 = isCellSolid(int3(v.x+1, v.y, v.z)); bool sv_10_z1 = isCellSolid(int3(v.x+1, v.y, v.z+1)); bool sv_01 = isCellSolid(int3(v.x, v.y+1, v.z)); bool sv_01_z1 = isCellSolid(int3(v.x, v.y+1, v.z+1)); bool sv_11 = isCellSolid(int3(v.x+1, v.y+1, v.z)); bool sv_11_z1 = isCellSolid(int3(v.x+1, v.y+1, v.z+1)); if (sv != sv_z1) { accum += computeQuadFaceNormal( v + int3(-1,-1,0), v + int3(0,-1,0), v + int3(-1,0,0), v, sv, 2); } if (sv_10 != sv_10_z1) { accum += computeQuadFaceNormal( int3(v.x, v.y-1, v.z), int3(v.x+1, v.y-1, v.z), v, int3(v.x+1, v.y, v.z), sv_10, 2); } if (sv_01 != sv_01_z1) { accum += computeQuadFaceNormal( int3(v.x-1, v.y, v.z), v, int3(v.x-1, v.y+1, v.z), int3(v.x, v.y+1, v.z), sv_01, 2); } if (sv_11 != sv_11_z1) { accum += computeQuadFaceNormal( v, int3(v.x+1, v.y, v.z), int3(v.x, v.y+1, v.z), int3(v.x+1, v.y+1, v.z), sv_11, 2); } } float len = length(accum); return (len > 0.0001) ? accum / len : float3(0, 1, 0); } // ── Emit helpers ──────────────────────────────────────────────────── void emitVertex(uint slot, float3 pos, float3 normal, uint primaryMat, uint secondaryMat, uint blendWeight) { GPUSmoothVertex vert; vert.px = pos.x; vert.py = pos.y; vert.pz = pos.z; vert.nx = normal.x; vert.ny = normal.y; vert.nz = normal.z; vert.packedMat = (primaryMat & 0xFF) | ((secondaryMat & 0xFF) << 8) | ((blendWeight & 0xFF) << 16); vert.packedChunk = push.chunkIndex & 0xFFFF; outputVerts[slot] = vert; } void emitQuad(float3 p[4], float3 n[4], uint mat, uint secMat, uint blendW, bool windingA) { uint slot; vertCounter.InterlockedAdd(0, 6, slot); if (slot + 6 > push.maxOutputVerts) return; if (windingA) { emitVertex(slot + 0, p[0], n[0], mat, secMat, blendW); emitVertex(slot + 1, p[1], n[1], mat, secMat, blendW); emitVertex(slot + 2, p[3], n[3], mat, secMat, blendW); emitVertex(slot + 3, p[0], n[0], mat, secMat, blendW); emitVertex(slot + 4, p[3], n[3], mat, secMat, blendW); emitVertex(slot + 5, p[2], n[2], mat, secMat, blendW); } else { emitVertex(slot + 0, p[0], n[0], mat, secMat, blendW); emitVertex(slot + 1, p[3], n[3], mat, secMat, blendW); emitVertex(slot + 2, p[1], n[1], mat, secMat, blendW); emitVertex(slot + 3, p[0], n[0], mat, secMat, blendW); emitVertex(slot + 4, p[2], n[2], mat, secMat, blendW); emitVertex(slot + 5, p[3], n[3], mat, secMat, blendW); } } // ── Main ──────────────────────────────────────────────────────────── [RootSignature(VOXEL_ROOTSIG)] [numthreads(8, 8, 8)] void main(uint3 DTid : SV_DispatchThreadID) { if (any(DTid >= CSIZE)) return; int3 cellPos = int3(DTid); bool cellSolid = isCellSolid(cellPos); float3 chunkWorldPos = chunkInfo[push.chunkIndex].worldPos.xyz; // ── X-edge: cellPos → cellPos + (1,0,0) ──────────────────────── { bool neighborSolid = isCellSolid(cellPos + int3(1, 0, 0)); if (cellSolid != neighborSolid) { int3 cells[4] = { cellPos + int3(0, -1, -1), cellPos + int3(0, 0, -1), cellPos + int3(0, -1, 0), cellPos }; if (isCentroidValid(cells[0]) && isCentroidValid(cells[1]) && isCentroidValid(cells[2]) && isCentroidValid(cells[3])) { float3 p[4], n[4]; [loop] for (uint i = 0; i < 4; i++) p[i] = chunkWorldPos + readCentroidPos(cells[i]); [loop] for (uint i = 0; i < 4; i++) n[i] = computeSmoothNormal(cells[i]); float3 fn = cross(p[1] - p[0], p[3] - p[0]); int s = cellSolid ? +1 : -1; if ((fn.x > 0.0) != (s > 0)) fn = -fn; bool windingA = !cellSolid; uint packed = readGridPacked(cells[3]); uint mat = packed & 0xFF; uint secMat = (packed >> 8) & 0xFF; uint blendW = (packed >> 16) & 0xFF; emitQuad(p, n, mat, secMat, blendW, windingA); } } } // ── Y-edge: cellPos → cellPos + (0,1,0) ──────────────────────── { bool neighborSolid = isCellSolid(cellPos + int3(0, 1, 0)); if (cellSolid != neighborSolid) { int3 cells[4] = { cellPos + int3(-1, 0, -1), cellPos + int3( 0, 0, -1), cellPos + int3(-1, 0, 0), cellPos }; if (isCentroidValid(cells[0]) && isCentroidValid(cells[1]) && isCentroidValid(cells[2]) && isCentroidValid(cells[3])) { float3 p[4], n[4]; [loop] for (uint i = 0; i < 4; i++) p[i] = chunkWorldPos + readCentroidPos(cells[i]); [loop] for (uint i = 0; i < 4; i++) n[i] = computeSmoothNormal(cells[i]); float3 fn = cross(p[1] - p[0], p[3] - p[0]); int s = cellSolid ? +1 : -1; if ((fn.y > 0.0) != (s > 0)) fn = -fn; bool windingA = !cellSolid; windingA = !windingA; // Y-axis winding flip uint packed = readGridPacked(cells[3]); uint mat = packed & 0xFF; uint secMat = (packed >> 8) & 0xFF; uint blendW = (packed >> 16) & 0xFF; emitQuad(p, n, mat, secMat, blendW, windingA); } } } // ── Z-edge: cellPos → cellPos + (0,0,1) ──────────────────────── { bool neighborSolid = isCellSolid(cellPos + int3(0, 0, 1)); if (cellSolid != neighborSolid) { int3 cells[4] = { cellPos + int3(-1, -1, 0), cellPos + int3( 0, -1, 0), cellPos + int3(-1, 0, 0), cellPos }; if (isCentroidValid(cells[0]) && isCentroidValid(cells[1]) && isCentroidValid(cells[2]) && isCentroidValid(cells[3])) { float3 p[4], n[4]; [loop] for (uint i = 0; i < 4; i++) p[i] = chunkWorldPos + readCentroidPos(cells[i]); [loop] for (uint i = 0; i < 4; i++) n[i] = computeSmoothNormal(cells[i]); float3 fn = cross(p[1] - p[0], p[3] - p[0]); int s = cellSolid ? +1 : -1; if ((fn.z > 0.0) != (s > 0)) fn = -fn; bool windingA = !cellSolid; uint packed = readGridPacked(cells[3]); uint mat = packed & 0xFF; uint secMat = (packed >> 8) & 0xFF; uint blendW = (packed >> 16) & 0xFF; emitQuad(p, n, mat, secMat, blendW, windingA); } } } }