- Remove geoN (ddx/ddy) from smooth PS entirely — use smooth interpolated normal N for all triplanar sampling (albedo, heightmap, normal map). geoN changes discontinuously at triangle edges, causing per-triangle faceting in texture weights and normal perturbation. - Tune consistency-based vertex normal blend to smoothstep(0.70, 0.90): snaps to face normal at 90° boundaries (seamless blocky join) while preserving smooth normals on curved terrain. - Unify all 3 edge axes (X/Y/Z) to same smoothstep formula (was mixed smoothstep + pow4). - Remove grass-specific hardcoded shading from both PS (side darkening, warm shift, ambient boost) — will be data-driven per-material later. - Remove CPU SmoothMesher code (GPU-only path). - Document all findings in TROUBLESHOOTING.md with calibration table.
348 lines
15 KiB
HLSL
348 lines
15 KiB
HLSL
// BVLE Voxels - GPU Smooth Mesher Pass 2: Emit with Smooth Normals
|
|
// Reads ONLY from centroid grid (written by pass 1). No voxel buffer access.
|
|
// This keeps the shader simple and fast to compile.
|
|
//
|
|
// Centroid grid format (float4 per cell, cells [-1..32]):
|
|
// xyz = chunk-local position (valid for surface cells)
|
|
// w = packed flags: bit24=valid, bit25=solid, [7:0]=mat, [15:8]=secMat, [23:16]=blend
|
|
//
|
|
// Dispatch: 4x4x4 groups of 8x8x8 threads per chunk (cells [0..31])
|
|
|
|
#include "voxelCommon.hlsli"
|
|
|
|
struct SmoothPush {
|
|
uint chunkIndex;
|
|
uint voxelBufferOffset; // unused in this shader
|
|
uint maxOutputVerts;
|
|
uint centroidGridOffset;
|
|
uint pad[8];
|
|
};
|
|
[[vk::push_constant]] ConstantBuffer<SmoothPush> push : register(b999);
|
|
|
|
StructuredBuffer<GPUChunkInfo> chunkInfo : register(t1);
|
|
StructuredBuffer<float4> centroidGrid : register(t2);
|
|
|
|
struct GPUSmoothVertex {
|
|
float px, py, pz;
|
|
float nx, ny, nz;
|
|
uint packedMat;
|
|
uint packedChunk;
|
|
};
|
|
RWStructuredBuffer<GPUSmoothVertex> outputVerts : register(u0);
|
|
RWByteAddressBuffer vertCounter : register(u1);
|
|
|
|
static const uint CSIZE = 32;
|
|
static const uint GRID_DIM = 34;
|
|
|
|
// ── Grid access helpers ─────────────────────────────────────────────
|
|
uint gridIndex(int3 cellPos) {
|
|
return push.centroidGridOffset +
|
|
(uint)(cellPos.z + 1) * GRID_DIM * GRID_DIM +
|
|
(uint)(cellPos.y + 1) * GRID_DIM +
|
|
(uint)(cellPos.x + 1);
|
|
}
|
|
|
|
uint readGridPacked(int3 cellPos) {
|
|
if (any(cellPos < -1) || any(cellPos > 32)) return 0;
|
|
return asuint(centroidGrid[gridIndex(cellPos)].w);
|
|
}
|
|
|
|
bool isCentroidValid(int3 cellPos) {
|
|
return (readGridPacked(cellPos) >> 24) & 1;
|
|
}
|
|
|
|
bool isCellSolid(int3 cellPos) {
|
|
return ((readGridPacked(cellPos) >> 25) & 1) != 0;
|
|
}
|
|
|
|
float3 readCentroidPos(int3 cellPos) {
|
|
return centroidGrid[gridIndex(cellPos)].xyz;
|
|
}
|
|
|
|
// ── Face normal for one quad (4 sharing cells) ──────────────────────
|
|
float3 computeQuadFaceNormal(int3 c0, int3 c1, int3 c2, int3 c3,
|
|
bool solid0, int edgeAxis) {
|
|
if (!isCentroidValid(c0) || !isCentroidValid(c1) ||
|
|
!isCentroidValid(c2) || !isCentroidValid(c3))
|
|
return float3(0, 0, 0);
|
|
|
|
float3 p0 = readCentroidPos(c0);
|
|
float3 p1 = readCentroidPos(c1);
|
|
float3 p3 = readCentroidPos(c3);
|
|
|
|
float3 fn = cross(p1 - p0, p3 - p0);
|
|
|
|
// Orient: solid→empty direction
|
|
int s = solid0 ? +1 : -1;
|
|
float fnAxis = (edgeAxis == 0) ? fn.x : ((edgeAxis == 1) ? fn.y : fn.z);
|
|
if ((fnAxis > 0.0) != (s > 0)) fn = -fn;
|
|
|
|
return fn; // area-weighted (not normalized)
|
|
}
|
|
|
|
// ── Smooth normal + consistency for a vertex at cell v ──────────────
|
|
// Checks all 12 incident edges (4 per axis), computes face normals from
|
|
// centroid grid, averages them. Also returns a consistency metric:
|
|
// consistency = |sum(fn)| / sum(|fn|)
|
|
// = 1.0 when all face normals agree (flat surface)
|
|
// ≈ 0.707 at a 90° edge (two perpendicular faces)
|
|
// → 0 when faces cancel out
|
|
// Used at emission time to blend between smooth normal (interior) and
|
|
// face normal (edge vertices).
|
|
float3 computeSmoothNormal(int3 v, out float consistency) {
|
|
float3 accum = float3(0, 0, 0);
|
|
float totalMag = 0;
|
|
|
|
// Helper macro: accumulate one quad's face normal + its magnitude
|
|
#define ACCUM_QUAD(c0,c1,c2,c3,solid,axis) { \
|
|
float3 fn_ = computeQuadFaceNormal(c0,c1,c2,c3,solid,axis); \
|
|
accum += fn_; \
|
|
totalMag += length(fn_); \
|
|
}
|
|
|
|
// X-edges: at (v.x, v.y+dy, v.z+dz) for dy,dz in {0,1}
|
|
{
|
|
bool sv = isCellSolid(v);
|
|
bool sv_x1 = isCellSolid(v + int3(1,0,0));
|
|
bool sv_01 = isCellSolid(int3(v.x, v.y+1, v.z));
|
|
bool sv_01_x1 = isCellSolid(int3(v.x+1, v.y+1, v.z));
|
|
bool sv_10 = isCellSolid(int3(v.x, v.y, v.z+1));
|
|
bool sv_10_x1 = isCellSolid(int3(v.x+1, v.y, v.z+1));
|
|
bool sv_11 = isCellSolid(int3(v.x, v.y+1, v.z+1));
|
|
bool sv_11_x1 = isCellSolid(int3(v.x+1, v.y+1, v.z+1));
|
|
|
|
if (sv != sv_x1)
|
|
ACCUM_QUAD(v+int3(0,-1,-1), v+int3(0,0,-1), v+int3(0,-1,0), v, sv, 0)
|
|
if (sv_01 != sv_01_x1)
|
|
ACCUM_QUAD(int3(v.x,v.y,v.z-1), int3(v.x,v.y+1,v.z-1), v, int3(v.x,v.y+1,v.z), sv_01, 0)
|
|
if (sv_10 != sv_10_x1)
|
|
ACCUM_QUAD(int3(v.x,v.y-1,v.z), v, int3(v.x,v.y-1,v.z+1), int3(v.x,v.y,v.z+1), sv_10, 0)
|
|
if (sv_11 != sv_11_x1)
|
|
ACCUM_QUAD(v, int3(v.x,v.y+1,v.z), int3(v.x,v.y,v.z+1), int3(v.x,v.y+1,v.z+1), sv_11, 0)
|
|
}
|
|
|
|
// Y-edges: at (v.x+dx, v.y, v.z+dz) for dx,dz in {0,1}
|
|
{
|
|
bool sv = isCellSolid(v);
|
|
bool sv_y1 = isCellSolid(v + int3(0,1,0));
|
|
bool sv_10 = isCellSolid(int3(v.x+1, v.y, v.z));
|
|
bool sv_10_y1 = isCellSolid(int3(v.x+1, v.y+1, v.z));
|
|
bool sv_01 = isCellSolid(int3(v.x, v.y, v.z+1));
|
|
bool sv_01_y1 = isCellSolid(int3(v.x, v.y+1, v.z+1));
|
|
bool sv_11 = isCellSolid(int3(v.x+1, v.y, v.z+1));
|
|
bool sv_11_y1 = isCellSolid(int3(v.x+1, v.y+1, v.z+1));
|
|
|
|
if (sv != sv_y1)
|
|
ACCUM_QUAD(v+int3(-1,0,-1), v+int3(0,0,-1), v+int3(-1,0,0), v, sv, 1)
|
|
if (sv_10 != sv_10_y1)
|
|
ACCUM_QUAD(int3(v.x,v.y,v.z-1), int3(v.x+1,v.y,v.z-1), v, int3(v.x+1,v.y,v.z), sv_10, 1)
|
|
if (sv_01 != sv_01_y1)
|
|
ACCUM_QUAD(int3(v.x-1,v.y,v.z), v, int3(v.x-1,v.y,v.z+1), int3(v.x,v.y,v.z+1), sv_01, 1)
|
|
if (sv_11 != sv_11_y1)
|
|
ACCUM_QUAD(v, int3(v.x+1,v.y,v.z), int3(v.x,v.y,v.z+1), int3(v.x+1,v.y,v.z+1), sv_11, 1)
|
|
}
|
|
|
|
// Z-edges: at (v.x+dx, v.y+dy, v.z) for dx,dy in {0,1}
|
|
{
|
|
bool sv = isCellSolid(v);
|
|
bool sv_z1 = isCellSolid(v + int3(0,0,1));
|
|
bool sv_10 = isCellSolid(int3(v.x+1, v.y, v.z));
|
|
bool sv_10_z1 = isCellSolid(int3(v.x+1, v.y, v.z+1));
|
|
bool sv_01 = isCellSolid(int3(v.x, v.y+1, v.z));
|
|
bool sv_01_z1 = isCellSolid(int3(v.x, v.y+1, v.z+1));
|
|
bool sv_11 = isCellSolid(int3(v.x+1, v.y+1, v.z));
|
|
bool sv_11_z1 = isCellSolid(int3(v.x+1, v.y+1, v.z+1));
|
|
|
|
if (sv != sv_z1)
|
|
ACCUM_QUAD(v+int3(-1,-1,0), v+int3(0,-1,0), v+int3(-1,0,0), v, sv, 2)
|
|
if (sv_10 != sv_10_z1)
|
|
ACCUM_QUAD(int3(v.x,v.y-1,v.z), int3(v.x+1,v.y-1,v.z), v, int3(v.x+1,v.y,v.z), sv_10, 2)
|
|
if (sv_01 != sv_01_z1)
|
|
ACCUM_QUAD(int3(v.x-1,v.y,v.z), v, int3(v.x-1,v.y+1,v.z), int3(v.x,v.y+1,v.z), sv_01, 2)
|
|
if (sv_11 != sv_11_z1)
|
|
ACCUM_QUAD(v, int3(v.x+1,v.y,v.z), int3(v.x,v.y+1,v.z), int3(v.x+1,v.y+1,v.z), sv_11, 2)
|
|
}
|
|
#undef ACCUM_QUAD
|
|
|
|
float accumLen = length(accum);
|
|
// consistency: 1.0 = all faces agree, <1.0 = diverging face directions
|
|
consistency = (totalMag > 0.0001) ? accumLen / totalMag : 1.0;
|
|
return (accumLen > 0.0001) ? accum / accumLen : float3(0, 1, 0);
|
|
}
|
|
|
|
// ── Emit helpers ────────────────────────────────────────────────────
|
|
void emitVertex(uint slot, float3 pos, float3 normal, uint primaryMat, uint secondaryMat, uint blendWeight) {
|
|
GPUSmoothVertex vert;
|
|
vert.px = pos.x; vert.py = pos.y; vert.pz = pos.z;
|
|
vert.nx = normal.x; vert.ny = normal.y; vert.nz = normal.z;
|
|
vert.packedMat = (primaryMat & 0xFF) | ((secondaryMat & 0xFF) << 8) | ((blendWeight & 0xFF) << 16);
|
|
vert.packedChunk = push.chunkIndex & 0xFFFF;
|
|
outputVerts[slot] = vert;
|
|
}
|
|
|
|
void emitQuad(float3 p[4], float3 n[4], uint mat, uint secMat, uint blendW, bool windingA) {
|
|
uint slot;
|
|
vertCounter.InterlockedAdd(0, 6, slot);
|
|
if (slot + 6 > push.maxOutputVerts) return;
|
|
|
|
if (windingA) {
|
|
emitVertex(slot + 0, p[0], n[0], mat, secMat, blendW);
|
|
emitVertex(slot + 1, p[1], n[1], mat, secMat, blendW);
|
|
emitVertex(slot + 2, p[3], n[3], mat, secMat, blendW);
|
|
emitVertex(slot + 3, p[0], n[0], mat, secMat, blendW);
|
|
emitVertex(slot + 4, p[3], n[3], mat, secMat, blendW);
|
|
emitVertex(slot + 5, p[2], n[2], mat, secMat, blendW);
|
|
} else {
|
|
emitVertex(slot + 0, p[0], n[0], mat, secMat, blendW);
|
|
emitVertex(slot + 1, p[3], n[3], mat, secMat, blendW);
|
|
emitVertex(slot + 2, p[1], n[1], mat, secMat, blendW);
|
|
emitVertex(slot + 3, p[0], n[0], mat, secMat, blendW);
|
|
emitVertex(slot + 4, p[2], n[2], mat, secMat, blendW);
|
|
emitVertex(slot + 5, p[3], n[3], mat, secMat, blendW);
|
|
}
|
|
}
|
|
|
|
// ── Main ────────────────────────────────────────────────────────────
|
|
[RootSignature(VOXEL_ROOTSIG)]
|
|
[numthreads(8, 8, 8)]
|
|
void main(uint3 DTid : SV_DispatchThreadID)
|
|
{
|
|
if (any(DTid >= CSIZE)) return;
|
|
int3 cellPos = int3(DTid);
|
|
|
|
bool cellSolid = isCellSolid(cellPos);
|
|
float3 chunkWorldPos = chunkInfo[push.chunkIndex].worldPos.xyz;
|
|
|
|
// ── X-edge: cellPos → cellPos + (1,0,0) ────────────────────────
|
|
{
|
|
bool neighborSolid = isCellSolid(cellPos + int3(1, 0, 0));
|
|
if (cellSolid != neighborSolid) {
|
|
int3 cells[4] = {
|
|
cellPos + int3(0, -1, -1),
|
|
cellPos + int3(0, 0, -1),
|
|
cellPos + int3(0, -1, 0),
|
|
cellPos
|
|
};
|
|
if (isCentroidValid(cells[0]) && isCentroidValid(cells[1]) &&
|
|
isCentroidValid(cells[2]) && isCentroidValid(cells[3])) {
|
|
float3 p[4], n[4];
|
|
float con[4];
|
|
[loop] for (uint i = 0; i < 4; i++)
|
|
p[i] = chunkWorldPos + readCentroidPos(cells[i]);
|
|
[loop] for (uint i = 0; i < 4; i++)
|
|
n[i] = computeSmoothNormal(cells[i], con[i]);
|
|
|
|
float3 fn = cross(p[1] - p[0], p[3] - p[0]);
|
|
int s = cellSolid ? +1 : -1;
|
|
if ((fn.x > 0.0) != (s > 0)) fn = -fn;
|
|
bool windingA = !cellSolid;
|
|
|
|
// Consistency-based blend: sharp edge vertices → face normal, curved → smooth
|
|
// consistency ≈ 1.0 = flat, ≈ 0.707 = 90° edge, < 0.5 = sharp corner
|
|
// smoothstep(0.70, 0.90): snaps to face normal at 90° boundaries (con<0.70)
|
|
// for seamless join with blocky, preserves smooth for terrain curves (con>0.90)
|
|
float fnLen = length(fn);
|
|
if (fnLen > 0.0001) {
|
|
float3 fnN = fn / fnLen;
|
|
[loop] for (uint i = 0; i < 4; i++) {
|
|
float t = smoothstep(0.70, 0.90, con[i]);
|
|
n[i] = normalize(lerp(fnN, n[i], t));
|
|
}
|
|
}
|
|
|
|
uint packed = readGridPacked(cells[3]);
|
|
uint mat = packed & 0xFF;
|
|
uint secMat = (packed >> 8) & 0xFF;
|
|
uint blendW = (packed >> 16) & 0xFF;
|
|
emitQuad(p, n, mat, secMat, blendW, windingA);
|
|
}
|
|
}
|
|
}
|
|
|
|
// ── Y-edge: cellPos → cellPos + (0,1,0) ────────────────────────
|
|
{
|
|
bool neighborSolid = isCellSolid(cellPos + int3(0, 1, 0));
|
|
if (cellSolid != neighborSolid) {
|
|
int3 cells[4] = {
|
|
cellPos + int3(-1, 0, -1),
|
|
cellPos + int3( 0, 0, -1),
|
|
cellPos + int3(-1, 0, 0),
|
|
cellPos
|
|
};
|
|
if (isCentroidValid(cells[0]) && isCentroidValid(cells[1]) &&
|
|
isCentroidValid(cells[2]) && isCentroidValid(cells[3])) {
|
|
float3 p[4], n[4];
|
|
float con[4];
|
|
[loop] for (uint i = 0; i < 4; i++)
|
|
p[i] = chunkWorldPos + readCentroidPos(cells[i]);
|
|
[loop] for (uint i = 0; i < 4; i++)
|
|
n[i] = computeSmoothNormal(cells[i], con[i]);
|
|
|
|
float3 fn = cross(p[1] - p[0], p[3] - p[0]);
|
|
int s = cellSolid ? +1 : -1;
|
|
if ((fn.y > 0.0) != (s > 0)) fn = -fn;
|
|
bool windingA = !cellSolid;
|
|
windingA = !windingA; // Y-axis winding flip
|
|
|
|
// Consistency-based blend (same formula as X-edge)
|
|
float fnLen = length(fn);
|
|
if (fnLen > 0.0001) {
|
|
float3 fnN = fn / fnLen;
|
|
[loop] for (uint i = 0; i < 4; i++) {
|
|
float t = smoothstep(0.70, 0.90, con[i]);
|
|
n[i] = normalize(lerp(fnN, n[i], t));
|
|
}
|
|
}
|
|
|
|
uint packed = readGridPacked(cells[3]);
|
|
uint mat = packed & 0xFF;
|
|
uint secMat = (packed >> 8) & 0xFF;
|
|
uint blendW = (packed >> 16) & 0xFF;
|
|
emitQuad(p, n, mat, secMat, blendW, windingA);
|
|
}
|
|
}
|
|
}
|
|
|
|
// ── Z-edge: cellPos → cellPos + (0,0,1) ────────────────────────
|
|
{
|
|
bool neighborSolid = isCellSolid(cellPos + int3(0, 0, 1));
|
|
if (cellSolid != neighborSolid) {
|
|
int3 cells[4] = {
|
|
cellPos + int3(-1, -1, 0),
|
|
cellPos + int3( 0, -1, 0),
|
|
cellPos + int3(-1, 0, 0),
|
|
cellPos
|
|
};
|
|
if (isCentroidValid(cells[0]) && isCentroidValid(cells[1]) &&
|
|
isCentroidValid(cells[2]) && isCentroidValid(cells[3])) {
|
|
float3 p[4], n[4];
|
|
float con[4];
|
|
[loop] for (uint i = 0; i < 4; i++)
|
|
p[i] = chunkWorldPos + readCentroidPos(cells[i]);
|
|
[loop] for (uint i = 0; i < 4; i++)
|
|
n[i] = computeSmoothNormal(cells[i], con[i]);
|
|
|
|
float3 fn = cross(p[1] - p[0], p[3] - p[0]);
|
|
int s = cellSolid ? +1 : -1;
|
|
if ((fn.z > 0.0) != (s > 0)) fn = -fn;
|
|
bool windingA = !cellSolid;
|
|
|
|
// Consistency-based blend (same formula as X-edge)
|
|
float fnLen = length(fn);
|
|
if (fnLen > 0.0001) {
|
|
float3 fnN = fn / fnLen;
|
|
[loop] for (uint i = 0; i < 4; i++) {
|
|
float t = smoothstep(0.70, 0.90, con[i]);
|
|
n[i] = normalize(lerp(fnN, n[i], t));
|
|
}
|
|
}
|
|
|
|
uint packed = readGridPacked(cells[3]);
|
|
uint mat = packed & 0xFF;
|
|
uint secMat = (packed >> 8) & 0xFF;
|
|
uint blendW = (packed >> 16) & 0xFF;
|
|
emitQuad(p, n, mat, secMat, blendW, windingA);
|
|
}
|
|
}
|
|
}
|
|
}
|