// BVLE Voxels - GPU Smooth Mesher Pass 2: Emit with Smooth Normals // Reads ONLY from centroid grid (written by pass 1). No voxel buffer access. // This keeps the shader simple and fast to compile. // // Centroid grid format (float4 per cell, cells [-1..32]): // xyz = chunk-local position (valid for surface cells) // w = packed flags: bit24=valid, bit25=solid, [7:0]=mat, [15:8]=secMat, [23:16]=blend // // Dispatch: 4x4x4 groups of 8x8x8 threads per chunk (cells [0..31]) #include "voxelCommon.hlsli" struct SmoothPush { uint chunkIndex; uint voxelBufferOffset; // unused in this shader uint maxOutputVerts; uint centroidGridOffset; uint pad[8]; }; [[vk::push_constant]] ConstantBuffer push : register(b999); StructuredBuffer chunkInfo : register(t1); StructuredBuffer centroidGrid : register(t2); struct GPUSmoothVertex { float px, py, pz; float nx, ny, nz; uint packedMat; uint packedChunk; }; RWStructuredBuffer outputVerts : register(u0); RWByteAddressBuffer vertCounter : register(u1); static const uint CSIZE = 32; static const uint GRID_DIM = 34; // ── Grid access helpers ───────────────────────────────────────────── uint gridIndex(int3 cellPos) { return push.centroidGridOffset + (uint)(cellPos.z + 1) * GRID_DIM * GRID_DIM + (uint)(cellPos.y + 1) * GRID_DIM + (uint)(cellPos.x + 1); } uint readGridPacked(int3 cellPos) { if (any(cellPos < -1) || any(cellPos > 32)) return 0; return asuint(centroidGrid[gridIndex(cellPos)].w); } bool isCentroidValid(int3 cellPos) { return (readGridPacked(cellPos) >> 24) & 1; } bool isCellSolid(int3 cellPos) { return ((readGridPacked(cellPos) >> 25) & 1) != 0; } float3 readCentroidPos(int3 cellPos) { return centroidGrid[gridIndex(cellPos)].xyz; } // ── Face normal for one quad (4 sharing cells) ────────────────────── float3 computeQuadFaceNormal(int3 c0, int3 c1, int3 c2, int3 c3, bool solid0, int edgeAxis) { if (!isCentroidValid(c0) || !isCentroidValid(c1) || !isCentroidValid(c2) || !isCentroidValid(c3)) return float3(0, 0, 0); float3 p0 = readCentroidPos(c0); float3 p1 = readCentroidPos(c1); float3 p3 = readCentroidPos(c3); float3 fn = cross(p1 - p0, p3 - p0); // Orient: solid→empty direction int s = solid0 ? +1 : -1; float fnAxis = (edgeAxis == 0) ? fn.x : ((edgeAxis == 1) ? fn.y : fn.z); if ((fnAxis > 0.0) != (s > 0)) fn = -fn; return fn; // area-weighted (not normalized) } // ── Smooth normal + consistency for a vertex at cell v ────────────── // Checks all 12 incident edges (4 per axis), computes face normals from // centroid grid, averages them. Also returns a consistency metric: // consistency = |sum(fn)| / sum(|fn|) // = 1.0 when all face normals agree (flat surface) // ≈ 0.707 at a 90° edge (two perpendicular faces) // → 0 when faces cancel out // Used at emission time to blend between smooth normal (interior) and // face normal (edge vertices). float3 computeSmoothNormal(int3 v, out float consistency) { float3 accum = float3(0, 0, 0); float totalMag = 0; // Helper macro: accumulate one quad's face normal + its magnitude #define ACCUM_QUAD(c0,c1,c2,c3,solid,axis) { \ float3 fn_ = computeQuadFaceNormal(c0,c1,c2,c3,solid,axis); \ accum += fn_; \ totalMag += length(fn_); \ } // X-edges: at (v.x, v.y+dy, v.z+dz) for dy,dz in {0,1} { bool sv = isCellSolid(v); bool sv_x1 = isCellSolid(v + int3(1,0,0)); bool sv_01 = isCellSolid(int3(v.x, v.y+1, v.z)); bool sv_01_x1 = isCellSolid(int3(v.x+1, v.y+1, v.z)); bool sv_10 = isCellSolid(int3(v.x, v.y, v.z+1)); bool sv_10_x1 = isCellSolid(int3(v.x+1, v.y, v.z+1)); bool sv_11 = isCellSolid(int3(v.x, v.y+1, v.z+1)); bool sv_11_x1 = isCellSolid(int3(v.x+1, v.y+1, v.z+1)); if (sv != sv_x1) ACCUM_QUAD(v+int3(0,-1,-1), v+int3(0,0,-1), v+int3(0,-1,0), v, sv, 0) if (sv_01 != sv_01_x1) ACCUM_QUAD(int3(v.x,v.y,v.z-1), int3(v.x,v.y+1,v.z-1), v, int3(v.x,v.y+1,v.z), sv_01, 0) if (sv_10 != sv_10_x1) ACCUM_QUAD(int3(v.x,v.y-1,v.z), v, int3(v.x,v.y-1,v.z+1), int3(v.x,v.y,v.z+1), sv_10, 0) if (sv_11 != sv_11_x1) ACCUM_QUAD(v, int3(v.x,v.y+1,v.z), int3(v.x,v.y,v.z+1), int3(v.x,v.y+1,v.z+1), sv_11, 0) } // Y-edges: at (v.x+dx, v.y, v.z+dz) for dx,dz in {0,1} { bool sv = isCellSolid(v); bool sv_y1 = isCellSolid(v + int3(0,1,0)); bool sv_10 = isCellSolid(int3(v.x+1, v.y, v.z)); bool sv_10_y1 = isCellSolid(int3(v.x+1, v.y+1, v.z)); bool sv_01 = isCellSolid(int3(v.x, v.y, v.z+1)); bool sv_01_y1 = isCellSolid(int3(v.x, v.y+1, v.z+1)); bool sv_11 = isCellSolid(int3(v.x+1, v.y, v.z+1)); bool sv_11_y1 = isCellSolid(int3(v.x+1, v.y+1, v.z+1)); if (sv != sv_y1) ACCUM_QUAD(v+int3(-1,0,-1), v+int3(0,0,-1), v+int3(-1,0,0), v, sv, 1) if (sv_10 != sv_10_y1) ACCUM_QUAD(int3(v.x,v.y,v.z-1), int3(v.x+1,v.y,v.z-1), v, int3(v.x+1,v.y,v.z), sv_10, 1) if (sv_01 != sv_01_y1) ACCUM_QUAD(int3(v.x-1,v.y,v.z), v, int3(v.x-1,v.y,v.z+1), int3(v.x,v.y,v.z+1), sv_01, 1) if (sv_11 != sv_11_y1) ACCUM_QUAD(v, int3(v.x+1,v.y,v.z), int3(v.x,v.y,v.z+1), int3(v.x+1,v.y,v.z+1), sv_11, 1) } // Z-edges: at (v.x+dx, v.y+dy, v.z) for dx,dy in {0,1} { bool sv = isCellSolid(v); bool sv_z1 = isCellSolid(v + int3(0,0,1)); bool sv_10 = isCellSolid(int3(v.x+1, v.y, v.z)); bool sv_10_z1 = isCellSolid(int3(v.x+1, v.y, v.z+1)); bool sv_01 = isCellSolid(int3(v.x, v.y+1, v.z)); bool sv_01_z1 = isCellSolid(int3(v.x, v.y+1, v.z+1)); bool sv_11 = isCellSolid(int3(v.x+1, v.y+1, v.z)); bool sv_11_z1 = isCellSolid(int3(v.x+1, v.y+1, v.z+1)); if (sv != sv_z1) ACCUM_QUAD(v+int3(-1,-1,0), v+int3(0,-1,0), v+int3(-1,0,0), v, sv, 2) if (sv_10 != sv_10_z1) ACCUM_QUAD(int3(v.x,v.y-1,v.z), int3(v.x+1,v.y-1,v.z), v, int3(v.x+1,v.y,v.z), sv_10, 2) if (sv_01 != sv_01_z1) ACCUM_QUAD(int3(v.x-1,v.y,v.z), v, int3(v.x-1,v.y+1,v.z), int3(v.x,v.y+1,v.z), sv_01, 2) if (sv_11 != sv_11_z1) ACCUM_QUAD(v, int3(v.x+1,v.y,v.z), int3(v.x,v.y+1,v.z), int3(v.x+1,v.y+1,v.z), sv_11, 2) } #undef ACCUM_QUAD float accumLen = length(accum); // consistency: 1.0 = all faces agree, <1.0 = diverging face directions consistency = (totalMag > 0.0001) ? accumLen / totalMag : 1.0; return (accumLen > 0.0001) ? accum / accumLen : float3(0, 1, 0); } // ── Emit helpers ──────────────────────────────────────────────────── void emitVertex(uint slot, float3 pos, float3 normal, uint primaryMat, uint secondaryMat, uint blendWeight) { GPUSmoothVertex vert; vert.px = pos.x; vert.py = pos.y; vert.pz = pos.z; vert.nx = normal.x; vert.ny = normal.y; vert.nz = normal.z; vert.packedMat = (primaryMat & 0xFF) | ((secondaryMat & 0xFF) << 8) | ((blendWeight & 0xFF) << 16); vert.packedChunk = push.chunkIndex & 0xFFFF; outputVerts[slot] = vert; } void emitQuad(float3 p[4], float3 n[4], uint mat, uint secMat, uint blendW, bool windingA) { uint slot; vertCounter.InterlockedAdd(0, 6, slot); if (slot + 6 > push.maxOutputVerts) return; if (windingA) { emitVertex(slot + 0, p[0], n[0], mat, secMat, blendW); emitVertex(slot + 1, p[1], n[1], mat, secMat, blendW); emitVertex(slot + 2, p[3], n[3], mat, secMat, blendW); emitVertex(slot + 3, p[0], n[0], mat, secMat, blendW); emitVertex(slot + 4, p[3], n[3], mat, secMat, blendW); emitVertex(slot + 5, p[2], n[2], mat, secMat, blendW); } else { emitVertex(slot + 0, p[0], n[0], mat, secMat, blendW); emitVertex(slot + 1, p[3], n[3], mat, secMat, blendW); emitVertex(slot + 2, p[1], n[1], mat, secMat, blendW); emitVertex(slot + 3, p[0], n[0], mat, secMat, blendW); emitVertex(slot + 4, p[2], n[2], mat, secMat, blendW); emitVertex(slot + 5, p[3], n[3], mat, secMat, blendW); } } // ── Main ──────────────────────────────────────────────────────────── [RootSignature(VOXEL_ROOTSIG)] [numthreads(8, 8, 8)] void main(uint3 DTid : SV_DispatchThreadID) { if (any(DTid >= CSIZE)) return; int3 cellPos = int3(DTid); bool cellSolid = isCellSolid(cellPos); float3 chunkWorldPos = chunkInfo[push.chunkIndex].worldPos.xyz; // ── X-edge: cellPos → cellPos + (1,0,0) ──────────────────────── { bool neighborSolid = isCellSolid(cellPos + int3(1, 0, 0)); if (cellSolid != neighborSolid) { int3 cells[4] = { cellPos + int3(0, -1, -1), cellPos + int3(0, 0, -1), cellPos + int3(0, -1, 0), cellPos }; if (isCentroidValid(cells[0]) && isCentroidValid(cells[1]) && isCentroidValid(cells[2]) && isCentroidValid(cells[3])) { float3 p[4], n[4]; float con[4]; [loop] for (uint i = 0; i < 4; i++) p[i] = chunkWorldPos + readCentroidPos(cells[i]); [loop] for (uint i = 0; i < 4; i++) n[i] = computeSmoothNormal(cells[i], con[i]); float3 fn = cross(p[1] - p[0], p[3] - p[0]); int s = cellSolid ? +1 : -1; if ((fn.x > 0.0) != (s > 0)) fn = -fn; bool windingA = !cellSolid; // Consistency-based blend: sharp edge vertices → face normal, curved → smooth // consistency ≈ 1.0 = flat, ≈ 0.707 = 90° edge, < 0.5 = sharp corner // smoothstep(0.70, 0.90): snaps to face normal at 90° boundaries (con<0.70) // for seamless join with blocky, preserves smooth for terrain curves (con>0.90) float fnLen = length(fn); if (fnLen > 0.0001) { float3 fnN = fn / fnLen; [loop] for (uint i = 0; i < 4; i++) { float t = smoothstep(0.70, 0.90, con[i]); n[i] = normalize(lerp(fnN, n[i], t)); } } uint packed = readGridPacked(cells[3]); uint mat = packed & 0xFF; uint secMat = (packed >> 8) & 0xFF; uint blendW = (packed >> 16) & 0xFF; emitQuad(p, n, mat, secMat, blendW, windingA); } } } // ── Y-edge: cellPos → cellPos + (0,1,0) ──────────────────────── { bool neighborSolid = isCellSolid(cellPos + int3(0, 1, 0)); if (cellSolid != neighborSolid) { int3 cells[4] = { cellPos + int3(-1, 0, -1), cellPos + int3( 0, 0, -1), cellPos + int3(-1, 0, 0), cellPos }; if (isCentroidValid(cells[0]) && isCentroidValid(cells[1]) && isCentroidValid(cells[2]) && isCentroidValid(cells[3])) { float3 p[4], n[4]; float con[4]; [loop] for (uint i = 0; i < 4; i++) p[i] = chunkWorldPos + readCentroidPos(cells[i]); [loop] for (uint i = 0; i < 4; i++) n[i] = computeSmoothNormal(cells[i], con[i]); float3 fn = cross(p[1] - p[0], p[3] - p[0]); int s = cellSolid ? +1 : -1; if ((fn.y > 0.0) != (s > 0)) fn = -fn; bool windingA = !cellSolid; windingA = !windingA; // Y-axis winding flip // Consistency-based blend (same formula as X-edge) float fnLen = length(fn); if (fnLen > 0.0001) { float3 fnN = fn / fnLen; [loop] for (uint i = 0; i < 4; i++) { float t = smoothstep(0.70, 0.90, con[i]); n[i] = normalize(lerp(fnN, n[i], t)); } } uint packed = readGridPacked(cells[3]); uint mat = packed & 0xFF; uint secMat = (packed >> 8) & 0xFF; uint blendW = (packed >> 16) & 0xFF; emitQuad(p, n, mat, secMat, blendW, windingA); } } } // ── Z-edge: cellPos → cellPos + (0,0,1) ──────────────────────── { bool neighborSolid = isCellSolid(cellPos + int3(0, 0, 1)); if (cellSolid != neighborSolid) { int3 cells[4] = { cellPos + int3(-1, -1, 0), cellPos + int3( 0, -1, 0), cellPos + int3(-1, 0, 0), cellPos }; if (isCentroidValid(cells[0]) && isCentroidValid(cells[1]) && isCentroidValid(cells[2]) && isCentroidValid(cells[3])) { float3 p[4], n[4]; float con[4]; [loop] for (uint i = 0; i < 4; i++) p[i] = chunkWorldPos + readCentroidPos(cells[i]); [loop] for (uint i = 0; i < 4; i++) n[i] = computeSmoothNormal(cells[i], con[i]); float3 fn = cross(p[1] - p[0], p[3] - p[0]); int s = cellSolid ? +1 : -1; if ((fn.z > 0.0) != (s > 0)) fn = -fn; bool windingA = !cellSolid; // Consistency-based blend (same formula as X-edge) float fnLen = length(fn); if (fnLen > 0.0001) { float3 fnN = fn / fnLen; [loop] for (uint i = 0; i < 4; i++) { float t = smoothstep(0.70, 0.90, con[i]); n[i] = normalize(lerp(fnN, n[i], t)); } } uint packed = readGridPacked(cells[3]); uint mat = packed & 0xFF; uint secMat = (packed >> 8) & 0xFF; uint blendW = (packed >> 16) & 0xFF; emitQuad(p, n, mat, secMat, blendW, windingA); } } } }