bvle-voxels/shaders/voxelSmoothCS.hlsl

336 lines
13 KiB
HLSL
Raw Normal View History

// BVLE Voxels - GPU Smooth Mesher Pass 2: Emit with Smooth Normals
// Reads ONLY from centroid grid (written by pass 1). No voxel buffer access.
// This keeps the shader simple and fast to compile.
//
// Centroid grid format (float4 per cell, cells [-1..32]):
// xyz = chunk-local position (valid for surface cells)
// w = packed flags: bit24=valid, bit25=solid, [7:0]=mat, [15:8]=secMat, [23:16]=blend
//
// Dispatch: 4x4x4 groups of 8x8x8 threads per chunk (cells [0..31])
#include "voxelCommon.hlsli"
struct SmoothPush {
uint chunkIndex;
uint voxelBufferOffset; // unused in this shader
uint maxOutputVerts;
uint centroidGridOffset;
uint pad[8];
};
[[vk::push_constant]] ConstantBuffer<SmoothPush> push : register(b999);
StructuredBuffer<GPUChunkInfo> chunkInfo : register(t1);
StructuredBuffer<float4> centroidGrid : register(t2);
struct GPUSmoothVertex {
float px, py, pz;
float nx, ny, nz;
uint packedMat;
uint packedChunk;
};
RWStructuredBuffer<GPUSmoothVertex> outputVerts : register(u0);
RWByteAddressBuffer vertCounter : register(u1);
static const uint CSIZE = 32;
static const uint GRID_DIM = 34;
// ── Grid access helpers ─────────────────────────────────────────────
uint gridIndex(int3 cellPos) {
return push.centroidGridOffset +
(uint)(cellPos.z + 1) * GRID_DIM * GRID_DIM +
(uint)(cellPos.y + 1) * GRID_DIM +
(uint)(cellPos.x + 1);
}
uint readGridPacked(int3 cellPos) {
if (any(cellPos < -1) || any(cellPos > 32)) return 0;
return asuint(centroidGrid[gridIndex(cellPos)].w);
}
bool isCentroidValid(int3 cellPos) {
return (readGridPacked(cellPos) >> 24) & 1;
}
bool isCellSolid(int3 cellPos) {
return ((readGridPacked(cellPos) >> 25) & 1) != 0;
}
float3 readCentroidPos(int3 cellPos) {
return centroidGrid[gridIndex(cellPos)].xyz;
}
// ── Face normal for one quad (4 sharing cells) ──────────────────────
float3 computeQuadFaceNormal(int3 c0, int3 c1, int3 c2, int3 c3,
bool solid0, int edgeAxis) {
if (!isCentroidValid(c0) || !isCentroidValid(c1) ||
!isCentroidValid(c2) || !isCentroidValid(c3))
return float3(0, 0, 0);
float3 p0 = readCentroidPos(c0);
float3 p1 = readCentroidPos(c1);
float3 p3 = readCentroidPos(c3);
float3 fn = cross(p1 - p0, p3 - p0);
// Orient: solid→empty direction
int s = solid0 ? +1 : -1;
float fnAxis = (edgeAxis == 0) ? fn.x : ((edgeAxis == 1) ? fn.y : fn.z);
if ((fnAxis > 0.0) != (s > 0)) fn = -fn;
return fn; // area-weighted (not normalized)
}
// ── Smooth normal for a vertex at cell v ────────────────────────────
// Checks all 12 incident edges (4 per axis), computes face normals from
// centroid grid, averages them. All reads from grid only.
float3 computeSmoothNormal(int3 v) {
float3 accum = float3(0, 0, 0);
// X-edges: at (v.x, v.y+dy, v.z+dz) for dy,dz in {0,1}
{
bool sv = isCellSolid(v);
bool sv_x1 = isCellSolid(v + int3(1,0,0));
bool sv_01 = isCellSolid(int3(v.x, v.y+1, v.z));
bool sv_01_x1 = isCellSolid(int3(v.x+1, v.y+1, v.z));
bool sv_10 = isCellSolid(int3(v.x, v.y, v.z+1));
bool sv_10_x1 = isCellSolid(int3(v.x+1, v.y, v.z+1));
bool sv_11 = isCellSolid(int3(v.x, v.y+1, v.z+1));
bool sv_11_x1 = isCellSolid(int3(v.x+1, v.y+1, v.z+1));
// Edge (v.x, v.y, v.z)
if (sv != sv_x1) {
accum += computeQuadFaceNormal(
v + int3(0,-1,-1), v + int3(0,0,-1),
v + int3(0,-1,0), v, sv, 0);
}
// Edge (v.x, v.y+1, v.z)
if (sv_01 != sv_01_x1) {
accum += computeQuadFaceNormal(
int3(v.x, v.y, v.z-1), int3(v.x, v.y+1, v.z-1),
v, int3(v.x, v.y+1, v.z), sv_01, 0);
}
// Edge (v.x, v.y, v.z+1)
if (sv_10 != sv_10_x1) {
accum += computeQuadFaceNormal(
int3(v.x, v.y-1, v.z), v,
int3(v.x, v.y-1, v.z+1), int3(v.x, v.y, v.z+1), sv_10, 0);
}
// Edge (v.x, v.y+1, v.z+1)
if (sv_11 != sv_11_x1) {
accum += computeQuadFaceNormal(
v, int3(v.x, v.y+1, v.z),
int3(v.x, v.y, v.z+1), int3(v.x, v.y+1, v.z+1), sv_11, 0);
}
}
// Y-edges: at (v.x+dx, v.y, v.z+dz) for dx,dz in {0,1}
{
bool sv = isCellSolid(v);
bool sv_y1 = isCellSolid(v + int3(0,1,0));
bool sv_10 = isCellSolid(int3(v.x+1, v.y, v.z));
bool sv_10_y1 = isCellSolid(int3(v.x+1, v.y+1, v.z));
bool sv_01 = isCellSolid(int3(v.x, v.y, v.z+1));
bool sv_01_y1 = isCellSolid(int3(v.x, v.y+1, v.z+1));
bool sv_11 = isCellSolid(int3(v.x+1, v.y, v.z+1));
bool sv_11_y1 = isCellSolid(int3(v.x+1, v.y+1, v.z+1));
if (sv != sv_y1) {
accum += computeQuadFaceNormal(
v + int3(-1,0,-1), v + int3(0,0,-1),
v + int3(-1,0,0), v, sv, 1);
}
if (sv_10 != sv_10_y1) {
accum += computeQuadFaceNormal(
int3(v.x, v.y, v.z-1), int3(v.x+1, v.y, v.z-1),
v, int3(v.x+1, v.y, v.z), sv_10, 1);
}
if (sv_01 != sv_01_y1) {
accum += computeQuadFaceNormal(
int3(v.x-1, v.y, v.z), v,
int3(v.x-1, v.y, v.z+1), int3(v.x, v.y, v.z+1), sv_01, 1);
}
if (sv_11 != sv_11_y1) {
accum += computeQuadFaceNormal(
v, int3(v.x+1, v.y, v.z),
int3(v.x, v.y, v.z+1), int3(v.x+1, v.y, v.z+1), sv_11, 1);
}
}
// Z-edges: at (v.x+dx, v.y+dy, v.z) for dx,dy in {0,1}
{
bool sv = isCellSolid(v);
bool sv_z1 = isCellSolid(v + int3(0,0,1));
bool sv_10 = isCellSolid(int3(v.x+1, v.y, v.z));
bool sv_10_z1 = isCellSolid(int3(v.x+1, v.y, v.z+1));
bool sv_01 = isCellSolid(int3(v.x, v.y+1, v.z));
bool sv_01_z1 = isCellSolid(int3(v.x, v.y+1, v.z+1));
bool sv_11 = isCellSolid(int3(v.x+1, v.y+1, v.z));
bool sv_11_z1 = isCellSolid(int3(v.x+1, v.y+1, v.z+1));
if (sv != sv_z1) {
accum += computeQuadFaceNormal(
v + int3(-1,-1,0), v + int3(0,-1,0),
v + int3(-1,0,0), v, sv, 2);
}
if (sv_10 != sv_10_z1) {
accum += computeQuadFaceNormal(
int3(v.x, v.y-1, v.z), int3(v.x+1, v.y-1, v.z),
v, int3(v.x+1, v.y, v.z), sv_10, 2);
}
if (sv_01 != sv_01_z1) {
accum += computeQuadFaceNormal(
int3(v.x-1, v.y, v.z), v,
int3(v.x-1, v.y+1, v.z), int3(v.x, v.y+1, v.z), sv_01, 2);
}
if (sv_11 != sv_11_z1) {
accum += computeQuadFaceNormal(
v, int3(v.x+1, v.y, v.z),
int3(v.x, v.y+1, v.z), int3(v.x+1, v.y+1, v.z), sv_11, 2);
}
}
float len = length(accum);
return (len > 0.0001) ? accum / len : float3(0, 1, 0);
}
// ── Emit helpers ────────────────────────────────────────────────────
void emitVertex(uint slot, float3 pos, float3 normal, uint primaryMat, uint secondaryMat, uint blendWeight) {
GPUSmoothVertex vert;
vert.px = pos.x; vert.py = pos.y; vert.pz = pos.z;
vert.nx = normal.x; vert.ny = normal.y; vert.nz = normal.z;
vert.packedMat = (primaryMat & 0xFF) | ((secondaryMat & 0xFF) << 8) | ((blendWeight & 0xFF) << 16);
vert.packedChunk = push.chunkIndex & 0xFFFF;
outputVerts[slot] = vert;
}
void emitQuad(float3 p[4], float3 n[4], uint mat, uint secMat, uint blendW, bool windingA) {
uint slot;
vertCounter.InterlockedAdd(0, 6, slot);
if (slot + 6 > push.maxOutputVerts) return;
if (windingA) {
emitVertex(slot + 0, p[0], n[0], mat, secMat, blendW);
emitVertex(slot + 1, p[1], n[1], mat, secMat, blendW);
emitVertex(slot + 2, p[3], n[3], mat, secMat, blendW);
emitVertex(slot + 3, p[0], n[0], mat, secMat, blendW);
emitVertex(slot + 4, p[3], n[3], mat, secMat, blendW);
emitVertex(slot + 5, p[2], n[2], mat, secMat, blendW);
} else {
emitVertex(slot + 0, p[0], n[0], mat, secMat, blendW);
emitVertex(slot + 1, p[3], n[3], mat, secMat, blendW);
emitVertex(slot + 2, p[1], n[1], mat, secMat, blendW);
emitVertex(slot + 3, p[0], n[0], mat, secMat, blendW);
emitVertex(slot + 4, p[2], n[2], mat, secMat, blendW);
emitVertex(slot + 5, p[3], n[3], mat, secMat, blendW);
}
}
// ── Main ────────────────────────────────────────────────────────────
[RootSignature(VOXEL_ROOTSIG)]
[numthreads(8, 8, 8)]
void main(uint3 DTid : SV_DispatchThreadID)
{
if (any(DTid >= CSIZE)) return;
int3 cellPos = int3(DTid);
bool cellSolid = isCellSolid(cellPos);
float3 chunkWorldPos = chunkInfo[push.chunkIndex].worldPos.xyz;
// ── X-edge: cellPos → cellPos + (1,0,0) ────────────────────────
{
bool neighborSolid = isCellSolid(cellPos + int3(1, 0, 0));
if (cellSolid != neighborSolid) {
int3 cells[4] = {
cellPos + int3(0, -1, -1),
cellPos + int3(0, 0, -1),
cellPos + int3(0, -1, 0),
cellPos
};
if (isCentroidValid(cells[0]) && isCentroidValid(cells[1]) &&
isCentroidValid(cells[2]) && isCentroidValid(cells[3])) {
float3 p[4], n[4];
[loop] for (uint i = 0; i < 4; i++)
p[i] = chunkWorldPos + readCentroidPos(cells[i]);
[loop] for (uint i = 0; i < 4; i++)
n[i] = computeSmoothNormal(cells[i]);
float3 fn = cross(p[1] - p[0], p[3] - p[0]);
int s = cellSolid ? +1 : -1;
if ((fn.x > 0.0) != (s > 0)) fn = -fn;
bool windingA = !cellSolid;
uint packed = readGridPacked(cells[3]);
uint mat = packed & 0xFF;
uint secMat = (packed >> 8) & 0xFF;
uint blendW = (packed >> 16) & 0xFF;
emitQuad(p, n, mat, secMat, blendW, windingA);
}
}
}
// ── Y-edge: cellPos → cellPos + (0,1,0) ────────────────────────
{
bool neighborSolid = isCellSolid(cellPos + int3(0, 1, 0));
if (cellSolid != neighborSolid) {
int3 cells[4] = {
cellPos + int3(-1, 0, -1),
cellPos + int3( 0, 0, -1),
cellPos + int3(-1, 0, 0),
cellPos
};
if (isCentroidValid(cells[0]) && isCentroidValid(cells[1]) &&
isCentroidValid(cells[2]) && isCentroidValid(cells[3])) {
float3 p[4], n[4];
[loop] for (uint i = 0; i < 4; i++)
p[i] = chunkWorldPos + readCentroidPos(cells[i]);
[loop] for (uint i = 0; i < 4; i++)
n[i] = computeSmoothNormal(cells[i]);
float3 fn = cross(p[1] - p[0], p[3] - p[0]);
int s = cellSolid ? +1 : -1;
if ((fn.y > 0.0) != (s > 0)) fn = -fn;
bool windingA = !cellSolid;
windingA = !windingA; // Y-axis winding flip
uint packed = readGridPacked(cells[3]);
uint mat = packed & 0xFF;
uint secMat = (packed >> 8) & 0xFF;
uint blendW = (packed >> 16) & 0xFF;
emitQuad(p, n, mat, secMat, blendW, windingA);
}
}
}
// ── Z-edge: cellPos → cellPos + (0,0,1) ────────────────────────
{
bool neighborSolid = isCellSolid(cellPos + int3(0, 0, 1));
if (cellSolid != neighborSolid) {
int3 cells[4] = {
cellPos + int3(-1, -1, 0),
cellPos + int3( 0, -1, 0),
cellPos + int3(-1, 0, 0),
cellPos
};
if (isCentroidValid(cells[0]) && isCentroidValid(cells[1]) &&
isCentroidValid(cells[2]) && isCentroidValid(cells[3])) {
float3 p[4], n[4];
[loop] for (uint i = 0; i < 4; i++)
p[i] = chunkWorldPos + readCentroidPos(cells[i]);
[loop] for (uint i = 0; i < 4; i++)
n[i] = computeSmoothNormal(cells[i]);
float3 fn = cross(p[1] - p[0], p[3] - p[0]);
int s = cellSolid ? +1 : -1;
if ((fn.z > 0.0) != (s > 0)) fn = -fn;
bool windingA = !cellSolid;
uint packed = readGridPacked(cells[3]);
uint mat = packed & 0xFF;
uint secMat = (packed >> 8) & 0xFF;
uint blendW = (packed >> 16) & 0xFF;
emitQuad(p, n, mat, secMat, blendW, windingA);
}
}
}
}