bvle-voxels/shaders/voxelSmoothCS.hlsl

// BVLE Voxels - GPU Smooth Mesher Pass 2: Emit with Smooth Normals
// Reads ONLY from centroid grid (written by pass 1). No voxel buffer access.
// This keeps the shader simple and fast to compile.
//
// Centroid grid format (float4 per cell, cells [-1..32]):
//   xyz = chunk-local position (valid for surface cells)
//   w   = packed flags: bit24=valid, bit25=solid, [7:0]=mat, [15:8]=secMat, [23:16]=blend
//
// Dispatch: 4x4x4 groups of 8x8x8 threads per chunk (cells [0..31])

#include "voxelCommon.hlsli"

struct SmoothPush {
    uint chunkIndex;
    uint voxelBufferOffset;   // unused in this shader
    uint maxOutputVerts;
    uint centroidGridOffset;
    uint pad[8];
};
[[vk::push_constant]] ConstantBuffer<SmoothPush> push : register(b999);

StructuredBuffer<GPUChunkInfo> chunkInfo : register(t1);
StructuredBuffer<float4> centroidGrid : register(t2);

struct GPUSmoothVertex {
    float px, py, pz;
    float nx, ny, nz;
    uint packedMat;
    uint packedChunk;
};
RWStructuredBuffer<GPUSmoothVertex> outputVerts : register(u0);
RWByteAddressBuffer vertCounter : register(u1);

static const uint CSIZE = 32;
static const uint GRID_DIM = 34;

// ── Grid access helpers ─────────────────────────────────────────────
uint gridIndex(int3 cellPos) {
    return push.centroidGridOffset +
           (uint)(cellPos.z + 1) * GRID_DIM * GRID_DIM +
           (uint)(cellPos.y + 1) * GRID_DIM +
           (uint)(cellPos.x + 1);
}

uint readGridPacked(int3 cellPos) {
    if (any(cellPos < -1) || any(cellPos > 32)) return 0;
    return asuint(centroidGrid[gridIndex(cellPos)].w);
}

bool isCentroidValid(int3 cellPos) {
    return (readGridPacked(cellPos) >> 24) & 1;
}

bool isCellSolid(int3 cellPos) {
    return ((readGridPacked(cellPos) >> 25) & 1) != 0;
}

float3 readCentroidPos(int3 cellPos) {
    return centroidGrid[gridIndex(cellPos)].xyz;
}

// ── Face normal for one quad (4 sharing cells) ──────────────────────
float3 computeQuadFaceNormal(int3 c0, int3 c1, int3 c2, int3 c3,
                              bool solid0, int edgeAxis) {
    if (!isCentroidValid(c0) || !isCentroidValid(c1) ||
        !isCentroidValid(c2) || !isCentroidValid(c3))
        return float3(0, 0, 0);

    float3 p0 = readCentroidPos(c0);
    float3 p1 = readCentroidPos(c1);
    float3 p3 = readCentroidPos(c3);

    float3 fn = cross(p1 - p0, p3 - p0);

    // Orient: solid→empty direction
    int s = solid0 ? +1 : -1;
    float fnAxis = (edgeAxis == 0) ? fn.x : ((edgeAxis == 1) ? fn.y : fn.z);
    if ((fnAxis > 0.0) != (s > 0)) fn = -fn;

    return fn; // area-weighted (not normalized)
}

// ── Smooth normal + consistency for a vertex at cell v ──────────────
// Checks all 12 incident edges (4 per axis), computes face normals from
// centroid grid, averages them. Also returns a consistency metric:
//   consistency = |sum(fn)| / sum(|fn|)
//   = 1.0 when all face normals agree (flat surface)
//   ≈ 0.707 at a 90° edge (two perpendicular faces)
//   → 0 when faces cancel out
// Used at emission time to blend between smooth normal (interior) and
// face normal (edge vertices).
float3 computeSmoothNormal(int3 v, out float consistency) {
    float3 accum = float3(0, 0, 0);
    float totalMag = 0;

    // Helper macro: accumulate one quad's face normal + its magnitude
    #define ACCUM_QUAD(c0,c1,c2,c3,solid,axis) { \
        float3 fn_ = computeQuadFaceNormal(c0,c1,c2,c3,solid,axis); \
        accum += fn_; \
        totalMag += length(fn_); \
    }

    // X-edges: at (v.x, v.y+dy, v.z+dz) for dy,dz in {0,1}
    {
        bool sv = isCellSolid(v);
        bool sv_x1 = isCellSolid(v + int3(1,0,0));
        bool sv_01 = isCellSolid(int3(v.x, v.y+1, v.z));
        bool sv_01_x1 = isCellSolid(int3(v.x+1, v.y+1, v.z));
        bool sv_10 = isCellSolid(int3(v.x, v.y, v.z+1));
        bool sv_10_x1 = isCellSolid(int3(v.x+1, v.y, v.z+1));
        bool sv_11 = isCellSolid(int3(v.x, v.y+1, v.z+1));
        bool sv_11_x1 = isCellSolid(int3(v.x+1, v.y+1, v.z+1));

        if (sv != sv_x1)
            ACCUM_QUAD(v+int3(0,-1,-1), v+int3(0,0,-1), v+int3(0,-1,0), v, sv, 0)
        if (sv_01 != sv_01_x1)
            ACCUM_QUAD(int3(v.x,v.y,v.z-1), int3(v.x,v.y+1,v.z-1), v, int3(v.x,v.y+1,v.z), sv_01, 0)
        if (sv_10 != sv_10_x1)
            ACCUM_QUAD(int3(v.x,v.y-1,v.z), v, int3(v.x,v.y-1,v.z+1), int3(v.x,v.y,v.z+1), sv_10, 0)
        if (sv_11 != sv_11_x1)
            ACCUM_QUAD(v, int3(v.x,v.y+1,v.z), int3(v.x,v.y,v.z+1), int3(v.x,v.y+1,v.z+1), sv_11, 0)
    }

    // Y-edges: at (v.x+dx, v.y, v.z+dz) for dx,dz in {0,1}
    {
        bool sv = isCellSolid(v);
        bool sv_y1 = isCellSolid(v + int3(0,1,0));
        bool sv_10 = isCellSolid(int3(v.x+1, v.y, v.z));
        bool sv_10_y1 = isCellSolid(int3(v.x+1, v.y+1, v.z));
        bool sv_01 = isCellSolid(int3(v.x, v.y, v.z+1));
        bool sv_01_y1 = isCellSolid(int3(v.x, v.y+1, v.z+1));
        bool sv_11 = isCellSolid(int3(v.x+1, v.y, v.z+1));
        bool sv_11_y1 = isCellSolid(int3(v.x+1, v.y+1, v.z+1));

        if (sv != sv_y1)
            ACCUM_QUAD(v+int3(-1,0,-1), v+int3(0,0,-1), v+int3(-1,0,0), v, sv, 1)
        if (sv_10 != sv_10_y1)
            ACCUM_QUAD(int3(v.x,v.y,v.z-1), int3(v.x+1,v.y,v.z-1), v, int3(v.x+1,v.y,v.z), sv_10, 1)
        if (sv_01 != sv_01_y1)
            ACCUM_QUAD(int3(v.x-1,v.y,v.z), v, int3(v.x-1,v.y,v.z+1), int3(v.x,v.y,v.z+1), sv_01, 1)
        if (sv_11 != sv_11_y1)
            ACCUM_QUAD(v, int3(v.x+1,v.y,v.z), int3(v.x,v.y,v.z+1), int3(v.x+1,v.y,v.z+1), sv_11, 1)
    }

    // Z-edges: at (v.x+dx, v.y+dy, v.z) for dx,dy in {0,1}
    {
        bool sv = isCellSolid(v);
        bool sv_z1 = isCellSolid(v + int3(0,0,1));
        bool sv_10 = isCellSolid(int3(v.x+1, v.y, v.z));
        bool sv_10_z1 = isCellSolid(int3(v.x+1, v.y, v.z+1));
        bool sv_01 = isCellSolid(int3(v.x, v.y+1, v.z));
        bool sv_01_z1 = isCellSolid(int3(v.x, v.y+1, v.z+1));
        bool sv_11 = isCellSolid(int3(v.x+1, v.y+1, v.z));
        bool sv_11_z1 = isCellSolid(int3(v.x+1, v.y+1, v.z+1));

        if (sv != sv_z1)
            ACCUM_QUAD(v+int3(-1,-1,0), v+int3(0,-1,0), v+int3(-1,0,0), v, sv, 2)
        if (sv_10 != sv_10_z1)
            ACCUM_QUAD(int3(v.x,v.y-1,v.z), int3(v.x+1,v.y-1,v.z), v, int3(v.x+1,v.y,v.z), sv_10, 2)
        if (sv_01 != sv_01_z1)
            ACCUM_QUAD(int3(v.x-1,v.y,v.z), v, int3(v.x-1,v.y+1,v.z), int3(v.x,v.y+1,v.z), sv_01, 2)
        if (sv_11 != sv_11_z1)
            ACCUM_QUAD(v, int3(v.x+1,v.y,v.z), int3(v.x,v.y+1,v.z), int3(v.x+1,v.y+1,v.z), sv_11, 2)
    }
    #undef ACCUM_QUAD

    float accumLen = length(accum);
    // consistency: 1.0 = all faces agree, <1.0 = diverging face directions
    consistency = (totalMag > 0.0001) ? accumLen / totalMag : 1.0;
    return (accumLen > 0.0001) ? accum / accumLen : float3(0, 1, 0);
}

// ── Emit helpers ────────────────────────────────────────────────────
void emitVertex(uint slot, float3 pos, float3 normal, uint primaryMat, uint secondaryMat, uint blendWeight) {
    GPUSmoothVertex vert;
    vert.px = pos.x; vert.py = pos.y; vert.pz = pos.z;
    vert.nx = normal.x; vert.ny = normal.y; vert.nz = normal.z;
    vert.packedMat = (primaryMat & 0xFF) | ((secondaryMat & 0xFF) << 8) | ((blendWeight & 0xFF) << 16);
    vert.packedChunk = push.chunkIndex & 0xFFFF;
    outputVerts[slot] = vert;
}

void emitQuad(float3 p[4], float3 n[4], uint mat, uint secMat, uint blendW, bool windingA) {
    uint slot;
    vertCounter.InterlockedAdd(0, 6, slot);
    if (slot + 6 > push.maxOutputVerts) return;

    if (windingA) {
        emitVertex(slot + 0, p[0], n[0], mat, secMat, blendW);
        emitVertex(slot + 1, p[1], n[1], mat, secMat, blendW);
        emitVertex(slot + 2, p[3], n[3], mat, secMat, blendW);
        emitVertex(slot + 3, p[0], n[0], mat, secMat, blendW);
        emitVertex(slot + 4, p[3], n[3], mat, secMat, blendW);
        emitVertex(slot + 5, p[2], n[2], mat, secMat, blendW);
    } else {
        emitVertex(slot + 0, p[0], n[0], mat, secMat, blendW);
        emitVertex(slot + 1, p[3], n[3], mat, secMat, blendW);
        emitVertex(slot + 2, p[1], n[1], mat, secMat, blendW);
        emitVertex(slot + 3, p[0], n[0], mat, secMat, blendW);
        emitVertex(slot + 4, p[2], n[2], mat, secMat, blendW);
        emitVertex(slot + 5, p[3], n[3], mat, secMat, blendW);
    }
}

// ── Main ────────────────────────────────────────────────────────────
[RootSignature(VOXEL_ROOTSIG)]
[numthreads(8, 8, 8)]
void main(uint3 DTid : SV_DispatchThreadID)
{
    if (any(DTid >= CSIZE)) return;
    int3 cellPos = int3(DTid);

    bool cellSolid = isCellSolid(cellPos);
    float3 chunkWorldPos = chunkInfo[push.chunkIndex].worldPos.xyz;

    // ── X-edge: cellPos → cellPos + (1,0,0) ────────────────────────
    {
        bool neighborSolid = isCellSolid(cellPos + int3(1, 0, 0));
        if (cellSolid != neighborSolid) {
            int3 cells[4] = {
                cellPos + int3(0, -1, -1),
                cellPos + int3(0,  0, -1),
                cellPos + int3(0, -1,  0),
                cellPos
            };
            if (isCentroidValid(cells[0]) && isCentroidValid(cells[1]) &&
                isCentroidValid(cells[2]) && isCentroidValid(cells[3])) {
                float3 p[4], n[4];
                float con[4];
                [loop] for (uint i = 0; i < 4; i++)
                    p[i] = chunkWorldPos + readCentroidPos(cells[i]);
                [loop] for (uint i = 0; i < 4; i++)
                    n[i] = computeSmoothNormal(cells[i], con[i]);

                float3 fn = cross(p[1] - p[0], p[3] - p[0]);
                int s = cellSolid ? +1 : -1;
                if ((fn.x > 0.0) != (s > 0)) fn = -fn;
                bool windingA = !cellSolid;

                // Consistency-based blend: sharp edge vertices → face normal, curved → smooth
                // consistency ≈ 1.0 = flat, ≈ 0.707 = 90° edge, < 0.5 = sharp corner
                // smoothstep(0.70, 0.90): snaps to face normal at 90° boundaries (con<0.70)
                // for seamless join with blocky, preserves smooth for terrain curves (con>0.90)
                float fnLen = length(fn);
                if (fnLen > 0.0001) {
                    float3 fnN = fn / fnLen;
                    [loop] for (uint i = 0; i < 4; i++) {
                        float t = smoothstep(0.70, 0.90, con[i]);
                        n[i] = normalize(lerp(fnN, n[i], t));
                    }
                }

                uint packed = readGridPacked(cells[3]);
                uint mat = packed & 0xFF;
                uint secMat = (packed >> 8) & 0xFF;
                uint blendW = (packed >> 16) & 0xFF;
                emitQuad(p, n, mat, secMat, blendW, windingA);
            }
        }
    }

    // ── Y-edge: cellPos → cellPos + (0,1,0) ────────────────────────
    {
        bool neighborSolid = isCellSolid(cellPos + int3(0, 1, 0));
        if (cellSolid != neighborSolid) {
            int3 cells[4] = {
                cellPos + int3(-1, 0, -1),
                cellPos + int3( 0, 0, -1),
                cellPos + int3(-1, 0,  0),
                cellPos
            };
            if (isCentroidValid(cells[0]) && isCentroidValid(cells[1]) &&
                isCentroidValid(cells[2]) && isCentroidValid(cells[3])) {
                float3 p[4], n[4];
                float con[4];
                [loop] for (uint i = 0; i < 4; i++)
                    p[i] = chunkWorldPos + readCentroidPos(cells[i]);
                [loop] for (uint i = 0; i < 4; i++)
                    n[i] = computeSmoothNormal(cells[i], con[i]);

                float3 fn = cross(p[1] - p[0], p[3] - p[0]);
                int s = cellSolid ? +1 : -1;
                if ((fn.y > 0.0) != (s > 0)) fn = -fn;
                bool windingA = !cellSolid;
                windingA = !windingA; // Y-axis winding flip

                // Consistency-based blend (same formula as X-edge)
                float fnLen = length(fn);
                if (fnLen > 0.0001) {
                    float3 fnN = fn / fnLen;
                    [loop] for (uint i = 0; i < 4; i++) {
                        float t = smoothstep(0.70, 0.90, con[i]);
                        n[i] = normalize(lerp(fnN, n[i], t));
                    }
                }

                uint packed = readGridPacked(cells[3]);
                uint mat = packed & 0xFF;
                uint secMat = (packed >> 8) & 0xFF;
                uint blendW = (packed >> 16) & 0xFF;
                emitQuad(p, n, mat, secMat, blendW, windingA);
            }
        }
    }

    // ── Z-edge: cellPos → cellPos + (0,0,1) ────────────────────────
    {
        bool neighborSolid = isCellSolid(cellPos + int3(0, 0, 1));
        if (cellSolid != neighborSolid) {
            int3 cells[4] = {
                cellPos + int3(-1, -1, 0),
                cellPos + int3( 0, -1, 0),
                cellPos + int3(-1,  0, 0),
                cellPos
            };
            if (isCentroidValid(cells[0]) && isCentroidValid(cells[1]) &&
                isCentroidValid(cells[2]) && isCentroidValid(cells[3])) {
                float3 p[4], n[4];
                float con[4];
                [loop] for (uint i = 0; i < 4; i++)
                    p[i] = chunkWorldPos + readCentroidPos(cells[i]);
                [loop] for (uint i = 0; i < 4; i++)
                    n[i] = computeSmoothNormal(cells[i], con[i]);

                float3 fn = cross(p[1] - p[0], p[3] - p[0]);
                int s = cellSolid ? +1 : -1;
                if ((fn.z > 0.0) != (s > 0)) fn = -fn;
                bool windingA = !cellSolid;

                // Consistency-based blend (same formula as X-edge)
                float fnLen = length(fn);
                if (fnLen > 0.0001) {
                    float3 fnN = fn / fnLen;
                    [loop] for (uint i = 0; i < 4; i++) {
                        float t = smoothstep(0.70, 0.90, con[i]);
                        n[i] = normalize(lerp(fnN, n[i], t));
                    }
                }

                uint packed = readGridPacked(cells[3]);
                uint mat = packed & 0xFF;
                uint secMat = (packed >> 8) & 0xFF;
                uint blendW = (packed >> 16) & 0xFF;
                emitQuad(p, n, mat, secMat, blendW, windingA);
            }
        }
    }
}