From 626fbaea80b10c5c6a2d1ef96e18bc03ba1e2dc3 Mon Sep 17 00:00:00 2001 From: Samuel Bouchet Date: Wed, 1 Apr 2026 20:35:42 +0200 Subject: [PATCH] Fix smooth Surface Nets rendering: eliminate faceting, fix blocky junction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Remove geoN (ddx/ddy) from smooth PS entirely — use smooth interpolated normal N for all triplanar sampling (albedo, heightmap, normal map). geoN changes discontinuously at triangle edges, causing per-triangle faceting in texture weights and normal perturbation. - Tune consistency-based vertex normal blend to smoothstep(0.70, 0.90): snaps to face normal at 90° boundaries (seamless blocky join) while preserving smooth normals on curved terrain. - Unify all 3 edge axes (X/Y/Z) to same smoothstep formula (was mixed smoothstep + pow4). - Remove grass-specific hardcoded shading from both PS (side darkening, warm shift, ambient boost) — will be data-driven per-material later. - Remove CPU SmoothMesher code (GPU-only path). - Document all findings in TROUBLESHOOTING.md with calibration table. --- TROUBLESHOOTING.md | 61 ++++ shaders/voxelPS.hlsl | 18 -- shaders/voxelSmoothCS.hlsl | 157 ++++++----- shaders/voxelSmoothPS.hlsl | 41 ++- src/voxel/VoxelMesher.cpp | 537 +----------------------------------- src/voxel/VoxelMesher.h | 21 -- src/voxel/VoxelRenderer.cpp | 155 +---------- src/voxel/VoxelRenderer.h | 15 +- src/voxel/VoxelWorld.h | 5 +- 9 files changed, 186 insertions(+), 824 deletions(-) diff --git a/TROUBLESHOOTING.md b/TROUBLESHOOTING.md index 54db1fd..f478c3a 100644 --- a/TROUBLESHOOTING.md +++ b/TROUBLESHOOTING.md @@ -19,6 +19,7 @@ - [CreateBuffer avec capacity > data size](#createbuffer-avec-capacity--data-size) - [BLAS/TLAS per-frame recreation — VRAM leak](#blastlas-per-frame-recreation--vram-leak) - [Diagnostics et debugging](#diagnostics-et-debugging) +- [Smooth Surface Nets — Rendu facetté et jointure blocky](#smooth-surface-nets--rendu-facetté-et-jointure-blocky) - [Gestion des resource states DX12 (buffers)](#gestion-des-resource-states-dx12-buffers) --- @@ -320,6 +321,66 @@ dev->BuildRaytracingAccelerationStructure(&blas, cmd, nullptr); --- +## Smooth Surface Nets — Rendu facetté et jointure blocky + +### Problème 1 : Rendu smooth facetté malgré normales lisses + +**Symptôme** : en mode debug (FLAT, NdotL, NORMAL), la surface smooth est parfaitement lisse. Mais en rendu final (ALL), elle apparaît facettée avec des arêtes de triangles visibles. + +**Cause racine** : `geoN` (geometric normal via `ddx(worldPos)`/`ddy(worldPos)`) était utilisé pour le triplanar sampling (poids de projection) ET le normal mapping. Cette valeur est la **face normal du triangle à l'écran** — elle change de manière **discontinue** à chaque arête de triangle. Résultat : + +1. **Poids triplanar discontinus** → la texture saute aux arêtes (coutures visibles) +2. **Normal map discontinu** → la perturbation normale diffère par triangle → NdotL facetté + +Les modes debug étaient lisses car ils utilisaient `flatN` (smooth normal **avant** perturbation normal map), pas le `N` perturbé. + +**Correction** : utiliser `N` (smooth interpolated normal) pour **tout** le triplanar dans `voxelSmoothPS.hlsl` : +- Poids triplanar albedo/heightmap → `N` (pas `geoN`) +- Normal map sampling → `N` (pas `geoN`) +- `geoN` n'est plus calculé/utilisé du tout + +`N` varie continûment entre vertices → transitions lisses partout. + +### Problème 2 : Jointure visible smooth/blocky + +**Symptôme** : contraste visible entre faces smooth et blocky adjacentes, quasi-coplanaires. + +**Causes racines** (cumulatives) : + +1. **Traitements per-material dans un seul PS** — le blocky PS avait un shading spécifique grass (side darkening 60%, warm shift chromatique, ambient boost ×1.15) absent du smooth PS. Pour une face grass +X, ça créait ~40% d'écart de luminosité. + +2. **Smooth normals biaisées aux frontières** — les vertex normals aux arêtes 90° (mur smooth → sol) étaient moyennées entre faces perpendiculaires (consistency ≈ 0.707), produisant une normale biaisée vers +Y au lieu de +X pur. + +**Correction** : +- **Supprimer les traitements per-material hardcodés** des deux PS. Quand on aura besoin de shading par matériau, le rendre data-driven et l'appliquer identiquement dans les deux shaders. +- **Consistency-based vertex normal blend** dans `voxelSmoothCS.hlsl` : métrique `|Σfn| / Σ|fn|` qui mesure l'accord des face normals incidentes. Les vertices à faible consistency (arêtes nettes, frontières) reçoivent la face normal pure ; les vertices à haute consistency (surfaces courbes) gardent la smooth normal. + +### Calibration du seuil de consistency + +Le seuil `smoothstep(low, high, consistency)` contrôle le compromis lisse/net : + +| Seuil | con=0.707 (90° edge) | con=0.85 (courbe) | con=0.95 (pente) | Résultat | +|---|---|---|---|---| +| `(0.85, 1.0)` | t=0 face ✓ | t=0 face ✗ | t=0.26 ≈ face ✗ | Trop agressif, tout facetté | +| `(0.60, 0.85)` | t=0.27 ≈ 73% face | t=1.0 smooth ✓ | t=1.0 smooth ✓ | Frontière visible, intérieur lisse | +| `(0.70, 0.90)` | t≈0 face ✓ | t=0.84 smooth ✓ | t=1.0 smooth ✓ | **Bon compromis** | + +**Valeur retenue : `smoothstep(0.70, 0.90)`** — les arêtes 90° (con ≤ 0.707) reçoivent 100% face normal (jointure nette avec blocky), les courbes modérées (con > 0.85) restent smooth. + +### Normal map strength + +Le smooth PS utilise `nmStrength * 0.7` (vs `nmStrength * 1.0` pour blocky). Les surfaces courbes nécessitent des normal maps atténuées pour que les perturbations ne cassent pas la continuité visuelle du smooth shading. + +### Règles + +- **Toute modification de lighting/texturing** dans `voxelPS.hlsl` doit être portée dans `voxelSmoothPS.hlsl` (et vice-versa) +- **Ne JAMAIS utiliser `geoN`** (ddx/ddy) dans le smooth PS pour le triplanar ou le normal mapping — utiliser `N` exclusivement +- Les deux PS doivent produire un résultat identique sur des faces coplanaires de même matériau + +**Fichiers** : `shaders/voxelSmoothCS.hlsl` (consistency blend), `shaders/voxelSmoothPS.hlsl` (triplanar + normal map), `shaders/voxelPS.hlsl` (blocky reference) + +--- + ## Gestion des resource states DX12 (buffers) **Wicked Engine ne fait AUCUN tracking automatique d'état pour les buffers.** Les `GPUBarrier::Buffer(buf, before, after)` sont passées directement à D3D12 sans validation. **Le `state_before` DOIT correspondre à l'état DX12 réel, sinon → DXGI_ERROR_INVALID_CALL.** diff --git a/shaders/voxelPS.hlsl b/shaders/voxelPS.hlsl index 9be2527..b73ee27 100644 --- a/shaders/voxelPS.hlsl +++ b/shaders/voxelPS.hlsl @@ -356,24 +356,6 @@ PSOutput main(PSInput input) float3 ambient = lerp(groundAmbient.rgb, skyAmbient.rgb, hemiLerp); float3 diffuse = sunColor.rgb * NdotL; - // Grass-specific shading (Wonderbox style) - bool isGrass = (texIndex == 0); // material 1 = grass = texture layer 0 - if (isGrass) { - // Vertical face darkening: use FLAT normal for consistency - float verticalDarken = saturate(abs(flatN.y)); // 1=top, 0=side - float sideFactor = lerp(0.60, 1.0, verticalDarken); // sides at 60% brightness - albedo *= sideFactor; - - // Subtle warm shift: sunlit grass slightly warmer - if (NdotL > 0.0) { - float3 warmShift = float3(0.08, 0.05, -0.03) * NdotL; - diffuse += warmShift; - } - - // Boost ambient for grass: inter-reflection from dense foliage - ambient *= 1.15; - } - // ── Debug lighting modes (F9 cycle) ── uint dbgLight = (uint)toneMapParams.w; if (dbgLight == 2) { diff --git a/shaders/voxelSmoothCS.hlsl b/shaders/voxelSmoothCS.hlsl index 0ddb7f7..f4bfb0d 100644 --- a/shaders/voxelSmoothCS.hlsl +++ b/shaders/voxelSmoothCS.hlsl @@ -80,11 +80,25 @@ float3 computeQuadFaceNormal(int3 c0, int3 c1, int3 c2, int3 c3, return fn; // area-weighted (not normalized) } -// ── Smooth normal for a vertex at cell v ──────────────────────────── +// ── Smooth normal + consistency for a vertex at cell v ────────────── // Checks all 12 incident edges (4 per axis), computes face normals from -// centroid grid, averages them. All reads from grid only. -float3 computeSmoothNormal(int3 v) { +// centroid grid, averages them. Also returns a consistency metric: +// consistency = |sum(fn)| / sum(|fn|) +// = 1.0 when all face normals agree (flat surface) +// ≈ 0.707 at a 90° edge (two perpendicular faces) +// → 0 when faces cancel out +// Used at emission time to blend between smooth normal (interior) and +// face normal (edge vertices). +float3 computeSmoothNormal(int3 v, out float consistency) { float3 accum = float3(0, 0, 0); + float totalMag = 0; + + // Helper macro: accumulate one quad's face normal + its magnitude + #define ACCUM_QUAD(c0,c1,c2,c3,solid,axis) { \ + float3 fn_ = computeQuadFaceNormal(c0,c1,c2,c3,solid,axis); \ + accum += fn_; \ + totalMag += length(fn_); \ + } // X-edges: at (v.x, v.y+dy, v.z+dz) for dy,dz in {0,1} { @@ -97,30 +111,14 @@ float3 computeSmoothNormal(int3 v) { bool sv_11 = isCellSolid(int3(v.x, v.y+1, v.z+1)); bool sv_11_x1 = isCellSolid(int3(v.x+1, v.y+1, v.z+1)); - // Edge (v.x, v.y, v.z) - if (sv != sv_x1) { - accum += computeQuadFaceNormal( - v + int3(0,-1,-1), v + int3(0,0,-1), - v + int3(0,-1,0), v, sv, 0); - } - // Edge (v.x, v.y+1, v.z) - if (sv_01 != sv_01_x1) { - accum += computeQuadFaceNormal( - int3(v.x, v.y, v.z-1), int3(v.x, v.y+1, v.z-1), - v, int3(v.x, v.y+1, v.z), sv_01, 0); - } - // Edge (v.x, v.y, v.z+1) - if (sv_10 != sv_10_x1) { - accum += computeQuadFaceNormal( - int3(v.x, v.y-1, v.z), v, - int3(v.x, v.y-1, v.z+1), int3(v.x, v.y, v.z+1), sv_10, 0); - } - // Edge (v.x, v.y+1, v.z+1) - if (sv_11 != sv_11_x1) { - accum += computeQuadFaceNormal( - v, int3(v.x, v.y+1, v.z), - int3(v.x, v.y, v.z+1), int3(v.x, v.y+1, v.z+1), sv_11, 0); - } + if (sv != sv_x1) + ACCUM_QUAD(v+int3(0,-1,-1), v+int3(0,0,-1), v+int3(0,-1,0), v, sv, 0) + if (sv_01 != sv_01_x1) + ACCUM_QUAD(int3(v.x,v.y,v.z-1), int3(v.x,v.y+1,v.z-1), v, int3(v.x,v.y+1,v.z), sv_01, 0) + if (sv_10 != sv_10_x1) + ACCUM_QUAD(int3(v.x,v.y-1,v.z), v, int3(v.x,v.y-1,v.z+1), int3(v.x,v.y,v.z+1), sv_10, 0) + if (sv_11 != sv_11_x1) + ACCUM_QUAD(v, int3(v.x,v.y+1,v.z), int3(v.x,v.y,v.z+1), int3(v.x,v.y+1,v.z+1), sv_11, 0) } // Y-edges: at (v.x+dx, v.y, v.z+dz) for dx,dz in {0,1} @@ -134,26 +132,14 @@ float3 computeSmoothNormal(int3 v) { bool sv_11 = isCellSolid(int3(v.x+1, v.y, v.z+1)); bool sv_11_y1 = isCellSolid(int3(v.x+1, v.y+1, v.z+1)); - if (sv != sv_y1) { - accum += computeQuadFaceNormal( - v + int3(-1,0,-1), v + int3(0,0,-1), - v + int3(-1,0,0), v, sv, 1); - } - if (sv_10 != sv_10_y1) { - accum += computeQuadFaceNormal( - int3(v.x, v.y, v.z-1), int3(v.x+1, v.y, v.z-1), - v, int3(v.x+1, v.y, v.z), sv_10, 1); - } - if (sv_01 != sv_01_y1) { - accum += computeQuadFaceNormal( - int3(v.x-1, v.y, v.z), v, - int3(v.x-1, v.y, v.z+1), int3(v.x, v.y, v.z+1), sv_01, 1); - } - if (sv_11 != sv_11_y1) { - accum += computeQuadFaceNormal( - v, int3(v.x+1, v.y, v.z), - int3(v.x, v.y, v.z+1), int3(v.x+1, v.y, v.z+1), sv_11, 1); - } + if (sv != sv_y1) + ACCUM_QUAD(v+int3(-1,0,-1), v+int3(0,0,-1), v+int3(-1,0,0), v, sv, 1) + if (sv_10 != sv_10_y1) + ACCUM_QUAD(int3(v.x,v.y,v.z-1), int3(v.x+1,v.y,v.z-1), v, int3(v.x+1,v.y,v.z), sv_10, 1) + if (sv_01 != sv_01_y1) + ACCUM_QUAD(int3(v.x-1,v.y,v.z), v, int3(v.x-1,v.y,v.z+1), int3(v.x,v.y,v.z+1), sv_01, 1) + if (sv_11 != sv_11_y1) + ACCUM_QUAD(v, int3(v.x+1,v.y,v.z), int3(v.x,v.y,v.z+1), int3(v.x+1,v.y,v.z+1), sv_11, 1) } // Z-edges: at (v.x+dx, v.y+dy, v.z) for dx,dy in {0,1} @@ -167,30 +153,21 @@ float3 computeSmoothNormal(int3 v) { bool sv_11 = isCellSolid(int3(v.x+1, v.y+1, v.z)); bool sv_11_z1 = isCellSolid(int3(v.x+1, v.y+1, v.z+1)); - if (sv != sv_z1) { - accum += computeQuadFaceNormal( - v + int3(-1,-1,0), v + int3(0,-1,0), - v + int3(-1,0,0), v, sv, 2); - } - if (sv_10 != sv_10_z1) { - accum += computeQuadFaceNormal( - int3(v.x, v.y-1, v.z), int3(v.x+1, v.y-1, v.z), - v, int3(v.x+1, v.y, v.z), sv_10, 2); - } - if (sv_01 != sv_01_z1) { - accum += computeQuadFaceNormal( - int3(v.x-1, v.y, v.z), v, - int3(v.x-1, v.y+1, v.z), int3(v.x, v.y+1, v.z), sv_01, 2); - } - if (sv_11 != sv_11_z1) { - accum += computeQuadFaceNormal( - v, int3(v.x+1, v.y, v.z), - int3(v.x, v.y+1, v.z), int3(v.x+1, v.y+1, v.z), sv_11, 2); - } + if (sv != sv_z1) + ACCUM_QUAD(v+int3(-1,-1,0), v+int3(0,-1,0), v+int3(-1,0,0), v, sv, 2) + if (sv_10 != sv_10_z1) + ACCUM_QUAD(int3(v.x,v.y-1,v.z), int3(v.x+1,v.y-1,v.z), v, int3(v.x+1,v.y,v.z), sv_10, 2) + if (sv_01 != sv_01_z1) + ACCUM_QUAD(int3(v.x-1,v.y,v.z), v, int3(v.x-1,v.y+1,v.z), int3(v.x,v.y+1,v.z), sv_01, 2) + if (sv_11 != sv_11_z1) + ACCUM_QUAD(v, int3(v.x+1,v.y,v.z), int3(v.x,v.y+1,v.z), int3(v.x+1,v.y+1,v.z), sv_11, 2) } + #undef ACCUM_QUAD - float len = length(accum); - return (len > 0.0001) ? accum / len : float3(0, 1, 0); + float accumLen = length(accum); + // consistency: 1.0 = all faces agree, <1.0 = diverging face directions + consistency = (totalMag > 0.0001) ? accumLen / totalMag : 1.0; + return (accumLen > 0.0001) ? accum / accumLen : float3(0, 1, 0); } // ── Emit helpers ──────────────────────────────────────────────────── @@ -249,16 +226,30 @@ void main(uint3 DTid : SV_DispatchThreadID) if (isCentroidValid(cells[0]) && isCentroidValid(cells[1]) && isCentroidValid(cells[2]) && isCentroidValid(cells[3])) { float3 p[4], n[4]; + float con[4]; [loop] for (uint i = 0; i < 4; i++) p[i] = chunkWorldPos + readCentroidPos(cells[i]); [loop] for (uint i = 0; i < 4; i++) - n[i] = computeSmoothNormal(cells[i]); + n[i] = computeSmoothNormal(cells[i], con[i]); float3 fn = cross(p[1] - p[0], p[3] - p[0]); int s = cellSolid ? +1 : -1; if ((fn.x > 0.0) != (s > 0)) fn = -fn; bool windingA = !cellSolid; + // Consistency-based blend: sharp edge vertices → face normal, curved → smooth + // consistency ≈ 1.0 = flat, ≈ 0.707 = 90° edge, < 0.5 = sharp corner + // smoothstep(0.70, 0.90): snaps to face normal at 90° boundaries (con<0.70) + // for seamless join with blocky, preserves smooth for terrain curves (con>0.90) + float fnLen = length(fn); + if (fnLen > 0.0001) { + float3 fnN = fn / fnLen; + [loop] for (uint i = 0; i < 4; i++) { + float t = smoothstep(0.70, 0.90, con[i]); + n[i] = normalize(lerp(fnN, n[i], t)); + } + } + uint packed = readGridPacked(cells[3]); uint mat = packed & 0xFF; uint secMat = (packed >> 8) & 0xFF; @@ -281,10 +272,11 @@ void main(uint3 DTid : SV_DispatchThreadID) if (isCentroidValid(cells[0]) && isCentroidValid(cells[1]) && isCentroidValid(cells[2]) && isCentroidValid(cells[3])) { float3 p[4], n[4]; + float con[4]; [loop] for (uint i = 0; i < 4; i++) p[i] = chunkWorldPos + readCentroidPos(cells[i]); [loop] for (uint i = 0; i < 4; i++) - n[i] = computeSmoothNormal(cells[i]); + n[i] = computeSmoothNormal(cells[i], con[i]); float3 fn = cross(p[1] - p[0], p[3] - p[0]); int s = cellSolid ? +1 : -1; @@ -292,6 +284,16 @@ void main(uint3 DTid : SV_DispatchThreadID) bool windingA = !cellSolid; windingA = !windingA; // Y-axis winding flip + // Consistency-based blend (same formula as X-edge) + float fnLen = length(fn); + if (fnLen > 0.0001) { + float3 fnN = fn / fnLen; + [loop] for (uint i = 0; i < 4; i++) { + float t = smoothstep(0.70, 0.90, con[i]); + n[i] = normalize(lerp(fnN, n[i], t)); + } + } + uint packed = readGridPacked(cells[3]); uint mat = packed & 0xFF; uint secMat = (packed >> 8) & 0xFF; @@ -314,16 +316,27 @@ void main(uint3 DTid : SV_DispatchThreadID) if (isCentroidValid(cells[0]) && isCentroidValid(cells[1]) && isCentroidValid(cells[2]) && isCentroidValid(cells[3])) { float3 p[4], n[4]; + float con[4]; [loop] for (uint i = 0; i < 4; i++) p[i] = chunkWorldPos + readCentroidPos(cells[i]); [loop] for (uint i = 0; i < 4; i++) - n[i] = computeSmoothNormal(cells[i]); + n[i] = computeSmoothNormal(cells[i], con[i]); float3 fn = cross(p[1] - p[0], p[3] - p[0]); int s = cellSolid ? +1 : -1; if ((fn.z > 0.0) != (s > 0)) fn = -fn; bool windingA = !cellSolid; + // Consistency-based blend (same formula as X-edge) + float fnLen = length(fn); + if (fnLen > 0.0001) { + float3 fnN = fn / fnLen; + [loop] for (uint i = 0; i < 4; i++) { + float t = smoothstep(0.70, 0.90, con[i]); + n[i] = normalize(lerp(fnN, n[i], t)); + } + } + uint packed = readGridPacked(cells[3]); uint mat = packed & 0xFF; uint secMat = (packed >> 8) & 0xFF; diff --git a/shaders/voxelSmoothPS.hlsl b/shaders/voxelSmoothPS.hlsl index c411a0d..2469ca9 100644 --- a/shaders/voxelSmoothPS.hlsl +++ b/shaders/voxelSmoothPS.hlsl @@ -124,14 +124,11 @@ PSOutput main(PSInput input) { PSOutput output; float3 N = normalize(input.normal); // smooth normal (for lighting) - // Geometric normal from screen-space derivatives of worldPos. - // This is the true triangle face normal — use it for triplanar weights - // to avoid texture stretching caused by smooth normal interpolation. - float3 dpx = ddx(input.worldPos); - float3 dpy = ddy(input.worldPos); - float3 geoN = normalize(cross(dpx, dpy)); - // Ensure geometric normal faces same hemisphere as smooth normal - if (dot(geoN, N) < 0.0) geoN = -geoN; + // NOTE: geoN (ddx/ddy geometric normal) is NOT used for triplanar sampling + // or normal mapping on smooth surfaces. It changes abruptly at triangle edges, + // causing per-triangle faceting in texture weights, normal perturbation, and + // therefore lighting (NdotL). All triplanar operations use N (smooth interpolated + // normal) which varies continuously across vertices → seamless result. float tiling = textureTiling; @@ -209,13 +206,13 @@ PSOutput main(PSInput input) { float3 albedo; if (uBlend || vBlend) { - float4 mainTex = sampleTriplanarRGBA(input.worldPos, geoN, selfTexIdx, tiling); + float4 mainTex = sampleTriplanarRGBA(input.worldPos, N, selfTexIdx, tiling); float3 result = mainTex.rgb; float sharpness = 16.0; if (uBlend) { uint uTexIdx = clamp(uNeighborMat - 1u, 0u, 5u); - float4 uTex = sampleTriplanarRGBA(input.worldPos, geoN, uTexIdx, tiling); + float4 uTex = sampleTriplanarRGBA(input.worldPos, N, uTexIdx, tiling); float bias; if (uNeighResists) { bias = 0.5 - uWeight * 1.6; @@ -230,7 +227,7 @@ PSOutput main(PSInput input) { if (vBlend) { uint vTexIdx = clamp(vNeighborMat - 1u, 0u, 5u); - float4 vTex = sampleTriplanarRGBA(input.worldPos, geoN, vTexIdx, tiling); + float4 vTex = sampleTriplanarRGBA(input.worldPos, N, vTexIdx, tiling); float bias; if (vNeighResists) { bias = 0.5 - vWeight * 1.6; @@ -245,17 +242,24 @@ PSOutput main(PSInput input) { albedo = result; } else { - albedo = sampleTriplanar(input.worldPos, geoN, selfTexIdx, tiling); + albedo = sampleTriplanar(input.worldPos, N, selfTexIdx, tiling); } // ── Normal map perturbation ── float3 flatN = N; // preserve for ambient float nmStrength = toneMapParams.z; if (nmStrength > 0.0) { - float3 perturbedN = sampleTriplanarNormal(input.worldPos, geoN, selfTexIdx, tiling); - N = normalize(lerp(N, perturbedN, nmStrength * 0.7)); // lighter on smooth + float3 perturbedN = sampleTriplanarNormal(input.worldPos, N, selfTexIdx, tiling); + N = normalize(lerp(N, perturbedN, nmStrength * 0.7)); // lighter on smooth for softer transitions } + // ── Lighting ── + float3 L = normalize(-sunDirection.xyz); + float NdotL = max(dot(N, L), 0.0); + float hemiLerp = flatN.y * 0.5 + 0.5; + float3 ambient = lerp(groundAmbient.rgb, skyAmbient.rgb, hemiLerp); + float3 diffuse = sunColor.rgb * NdotL; + // ── Debug lighting modes (F9 cycle) ── uint dbgLight = (uint)toneMapParams.w; if (dbgLight == 2) { @@ -275,7 +279,7 @@ PSOutput main(PSInput input) { return output; } if (dbgLight == 4) { - // NdotL only: grayscale NdotL with geometric normal (no normal map) + // NdotL only: grayscale NdotL with flat normal (no normal map) float flatNdotL = max(dot(flatN, normalize(-sunDirection.xyz)), 0.0); output.color = float4(flatNdotL, flatNdotL, flatNdotL, 1.0); output.normal = float4(flatN, 0.0); @@ -288,12 +292,7 @@ PSOutput main(PSInput input) { return output; } - // Lighting: flat normal for ambient (consistent), perturbed for NdotL (detail) - float3 L = normalize(-sunDirection.xyz); - float NdotL = max(dot(N, L), 0.0); - float hemiLerp = flatN.y * 0.5 + 0.5; - float3 ambient = lerp(groundAmbient.rgb, skyAmbient.rgb, hemiLerp); - float3 color = albedo * (sunColor.rgb * NdotL + ambient); + float3 color = albedo * (ambient + diffuse); // ── Rim light ── float3 V = normalize(cameraPosition.xyz - input.worldPos); diff --git a/src/voxel/VoxelMesher.cpp b/src/voxel/VoxelMesher.cpp index 8239323..bae5b45 100644 --- a/src/voxel/VoxelMesher.cpp +++ b/src/voxel/VoxelMesher.cpp @@ -243,538 +243,11 @@ uint8_t VoxelMesher::calcAO(const VoxelWorld& world, const ChunkPos& cpos, } // ══════════════════════════════════════════════════════════════════ -// ── Naive Surface Nets Mesher (Phase 5) ───────────────────────── +// ── Smooth meshing (Phase 5) ──────────────────────────────────── // ══════════════════════════════════════════════════════════════════ -// -// Algorithm: -// 1. Compute SDF for each voxel: smooth solid = -1, empty = +1 -// Non-smooth solid voxels act as hard walls (SDF crushed to -1). -// 2. For each cell on the surface (SDF sign differs from at least one neighbor), -// place a vertex at the centroid of edge crossings. -// 3. For each edge (pair of adjacent cells) with a sign change, -// emit a quad connecting the 4 cells that share that edge, then split to 2 triangles. -// 4. Normals derived from SDF gradient (central differences). - -// Padded grid: +2 border for cross-chunk SDF lookups and neighbor smooth detection -static constexpr int PAD = 2; -static constexpr int GRID = CHUNK_SIZE + 2 * PAD; // 36 - -static inline int gridIdx(int x, int y, int z) { - return (x + PAD) + (y + PAD) * GRID + (z + PAD) * GRID * GRID; -} - -// Helper: read voxel data at chunk-local coords (with cross-chunk fallback) -static VoxelData readVoxel(const Chunk& chunk, const VoxelWorld& world, int x, int y, int z) { - if (chunk.isInBounds(x, y, z)) - return chunk.at(x, y, z); - return world.getVoxel( - chunk.pos.x * CHUNK_SIZE + x, - chunk.pos.y * CHUNK_SIZE + y, - chunk.pos.z * CHUNK_SIZE + z); -} - -float SmoothMesher::computeSDF(const Chunk& chunk, const VoxelWorld& world, - int x, int y, int z) { - VoxelData v = readVoxel(chunk, world, x, y, z); - if (v.isEmpty()) return 1.0f; // empty → positive SDF - return -1.0f; // any solid → negative SDF -} - -void SmoothMesher::computeNormal(const Chunk& chunk, const VoxelWorld& world, - int x, int y, int z, - float& nx, float& ny, float& nz) { - // Central differences of the SDF - float dx = computeSDF(chunk, world, x+1, y, z) - computeSDF(chunk, world, x-1, y, z); - float dy = computeSDF(chunk, world, x, y+1, z) - computeSDF(chunk, world, x, y-1, z); - float dz = computeSDF(chunk, world, x, y, z+1) - computeSDF(chunk, world, x, y, z-1); - - float len = std::sqrt(dx*dx + dy*dy + dz*dz); - if (len > 0.0001f) { - nx = dx / len; - ny = dy / len; - nz = dz / len; - } else { - nx = 0.0f; ny = 1.0f; nz = 0.0f; - } -} - -// Thread-local scratch buffers to avoid per-chunk allocation overhead. -// Each worker thread gets its own set, eliminating malloc/free thrashing. -struct SmoothScratch { - float sdf[GRID * GRID * GRID]; - uint8_t smoothGrid[GRID * GRID * GRID]; - uint8_t smoothNear[GRID * GRID * GRID]; // dilated: 1 if smooth OR face-adjacent to smooth - VoxelData voxelGrid[GRID * GRID * GRID]; - int32_t vertexMap[33 * 33 * 33]; // VERT_RANGE³ -}; -static thread_local SmoothScratch* tls_scratch = nullptr; - -uint32_t SmoothMesher::meshChunk(Chunk& chunk, const VoxelWorld& world) { - chunk.smoothVertices.clear(); - chunk.hasSmooth = false; - - // ── Early exit: skip chunks far from any smooth voxels ────── - // Check this chunk + 26 neighbors for containsSmooth flag. - // This avoids the expensive 36³ grid fill for ~70% of chunks. - { - bool nearSmooth = chunk.containsSmooth; - if (!nearSmooth) { - for (int dz = -1; dz <= 1 && !nearSmooth; dz++) - for (int dy = -1; dy <= 1 && !nearSmooth; dy++) - for (int dx = -1; dx <= 1 && !nearSmooth; dx++) { - if (dx == 0 && dy == 0 && dz == 0) continue; - const Chunk* nc = world.getChunk( - ChunkPos{chunk.pos.x + dx, chunk.pos.y + dy, chunk.pos.z + dz}); - if (nc && nc->containsSmooth) nearSmooth = true; - } - } - if (!nearSmooth) return 0; - } - - // Allocate thread-local scratch once per thread (persists across calls) - if (!tls_scratch) tls_scratch = new SmoothScratch(); - auto& scratch = *tls_scratch; - - // ── Step 1: Build SDF grid + smooth flag grid + voxel cache ── - // PAD=2 so we have SDF data for cells at [-1..CHUNK_SIZE] (all 8 corners accessible) - // Also build a "isSmooth" grid for the same range to detect proximity to smooth voxels. - // voxelGrid caches VoxelData to avoid repeated cross-chunk hashmap lookups later. - float* sdf = scratch.sdf; - uint8_t* smoothGrid = scratch.smoothGrid; - VoxelData* voxelGrid = scratch.voxelGrid; - constexpr int GRID3 = GRID * GRID * GRID; - std::memset(smoothGrid, 0, GRID3); - // SDF defaults to 1.0f (empty) — fill below - for (int i = 0; i < GRID3; i++) sdf[i] = 1.0f; - bool anySmooth = false; - - // Pre-cache neighbor chunk pointers for fast cross-chunk access - const Chunk* neighborChunks[3][3][3] = {}; - for (int dz = -1; dz <= 1; dz++) - for (int dy = -1; dy <= 1; dy++) - for (int dx = -1; dx <= 1; dx++) { - neighborChunks[dx+1][dy+1][dz+1] = world.getChunk( - ChunkPos{chunk.pos.x + dx, chunk.pos.y + dy, chunk.pos.z + dz}); - } - - // Helper: fast voxel read using cached neighbor chunk pointers - auto readVoxelFast = [&](int x, int y, int z) -> VoxelData { - if (x >= 0 && x < CHUNK_SIZE && y >= 0 && y < CHUNK_SIZE && z >= 0 && z < CHUNK_SIZE) - return chunk.at(x, y, z); - // Determine which neighbor chunk - int cx = (x < 0) ? 0 : (x >= CHUNK_SIZE) ? 2 : 1; - int cy = (y < 0) ? 0 : (y >= CHUNK_SIZE) ? 2 : 1; - int cz = (z < 0) ? 0 : (z >= CHUNK_SIZE) ? 2 : 1; - const Chunk* nc = neighborChunks[cx][cy][cz]; - if (!nc) return VoxelData{}; // empty if chunk not loaded - int lx = ((x % CHUNK_SIZE) + CHUNK_SIZE) % CHUNK_SIZE; - int ly = ((y % CHUNK_SIZE) + CHUNK_SIZE) % CHUNK_SIZE; - int lz = ((z % CHUNK_SIZE) + CHUNK_SIZE) % CHUNK_SIZE; - return nc->at(lx, ly, lz); - }; - - for (int z = -PAD; z < CHUNK_SIZE + PAD; z++) { - for (int y = -PAD; y < CHUNK_SIZE + PAD; y++) { - for (int x = -PAD; x < CHUNK_SIZE + PAD; x++) { - int gi = gridIdx(x, y, z); - VoxelData v = readVoxelFast(x, y, z); - voxelGrid[gi] = v; - sdf[gi] = v.isEmpty() ? 1.0f : -1.0f; - if (v.isSmooth()) { - smoothGrid[gi] = 1; - // Only need anySmooth for this chunk's own voxels - if (chunk.isInBounds(x, y, z)) anySmooth = true; - } - } - } - } - - // Also check 1 beyond the chunk (neighbor chunks may have smooth voxels that - // affect cells at the chunk boundary) - if (!anySmooth) { - // Check if any neighbor voxels just outside the chunk are smooth - for (int z = -1; z <= CHUNK_SIZE && !anySmooth; z++) - for (int y = -1; y <= CHUNK_SIZE && !anySmooth; y++) - for (int x = -1; x <= CHUNK_SIZE && !anySmooth; x++) { - if (chunk.isInBounds(x, y, z)) continue; // already checked - if (smoothGrid[gridIdx(x, y, z)]) anySmooth = true; - } - } - - if (!anySmooth) return 0; - chunk.hasSmooth = true; - - // ── Step 1b: Dilate smoothGrid → smoothNear ────────────────── - // Pre-compute "smooth or face-adjacent to smooth" to reduce the - // per-cell hasSmooth check from 56 lookups to 8 lookups. - uint8_t* smoothNear = scratch.smoothNear; - std::memcpy(smoothNear, smoothGrid, GRID3); - for (int z = -PAD + 1; z < CHUNK_SIZE + PAD - 1; z++) - for (int y = -PAD + 1; y < CHUNK_SIZE + PAD - 1; y++) - for (int x = -PAD + 1; x < CHUNK_SIZE + PAD - 1; x++) { - if (smoothGrid[gridIdx(x, y, z)]) { - smoothNear[gridIdx(x+1, y, z)] = 1; - smoothNear[gridIdx(x-1, y, z)] = 1; - smoothNear[gridIdx(x, y+1, z)] = 1; - smoothNear[gridIdx(x, y-1, z)] = 1; - smoothNear[gridIdx(x, y, z+1)] = 1; - smoothNear[gridIdx(x, y, z-1)] = 1; - } - } - - // ── Step 2: Generate vertices for surface cells ────────────── - // Extended range: [-1, CHUNK_SIZE) for cross-chunk connectivity. - // This chunk generates vertices for cells at [-1..CHUNK_SIZE-1]. - // The vertex map covers [-1..CHUNK_SIZE-1] → size = CHUNK_SIZE+1, offset by +1. - static constexpr int VERT_MIN = -1; - static constexpr int VERT_MAX = CHUNK_SIZE; // exclusive - static constexpr int VERT_RANGE = VERT_MAX - VERT_MIN; // CHUNK_SIZE + 1 = 33 - int32_t* vertexMap = scratch.vertexMap; - std::memset(vertexMap, -1, VERT_RANGE * VERT_RANGE * VERT_RANGE * sizeof(int32_t)); - - auto vertMapIdx = [](int x, int y, int z) -> int { - // shift coordinates by -VERT_MIN = +1 so index range is [0, VERT_RANGE) - return (x - VERT_MIN) + (y - VERT_MIN) * VERT_RANGE + (z - VERT_MIN) * VERT_RANGE * VERT_RANGE; - }; - - // World offset for this chunk - float ox = (float)(chunk.pos.x * CHUNK_SIZE); - float oy = (float)(chunk.pos.y * CHUNK_SIZE); - float oz = (float)(chunk.pos.z * CHUNK_SIZE); - - // Corner offsets: (dx,dy,dz) for corner index 0-7 of a cell - static const int cornerOff[8][3] = { - {0,0,0}, {1,0,0}, {0,1,0}, {1,1,0}, - {0,0,1}, {1,0,1}, {0,1,1}, {1,1,1}, - }; - static const float cornerOffF[8][3] = { - {0,0,0}, {1,0,0}, {0,1,0}, {1,1,0}, - {0,0,1}, {1,0,1}, {0,1,1}, {1,1,1}, - }; - static const int edges[12][2] = { - {0,1}, {2,3}, {4,5}, {6,7}, // X-axis edges - {0,2}, {1,3}, {4,6}, {5,7}, // Y-axis edges - {0,4}, {1,5}, {2,6}, {3,7}, // Z-axis edges - }; - - for (int z = VERT_MIN; z < VERT_MAX; z++) { - for (int y = VERT_MIN; y < VERT_MAX; y++) { - for (int x = VERT_MIN; x < VERT_MAX; x++) { - // hasSmooth check via dilated grid: at least one corner must be - // smooth or face-adjacent to smooth. Uses pre-dilated smoothNear - // grid → only 8 lookups instead of 56. - bool hasSmooth = false; - for (int c = 0; c < 8 && !hasSmooth; c++) { - if (smoothNear[gridIdx(x + cornerOff[c][0], y + cornerOff[c][1], z + cornerOff[c][2])]) - hasSmooth = true; - } - if (!hasSmooth) continue; - - // Get SDF at 8 corners of cell (x,y,z) - float corner[8]; - bool hasPos = false, hasNeg = false; - for (int c = 0; c < 8; c++) { - corner[c] = sdf[gridIdx(x + cornerOff[c][0], y + cornerOff[c][1], z + cornerOff[c][2])]; - if (corner[c] < 0.0f) hasNeg = true; - else hasPos = true; - } - - if (!hasPos || !hasNeg) continue; // no sign change → not on surface - - // Compute vertex position as centroid of edge crossings. - // +0.5 offset: SDF is sampled at voxel centers, so the cell spans - // from (x+0.5) to (x+1.5) in world space. This naturally aligns - // the isosurface with the integer grid (voxel face positions). - float sumX = 0, sumY = 0, sumZ = 0; - int crossCount = 0; - - for (int e = 0; e < 12; e++) { - float s0 = corner[edges[e][0]]; - float s1 = corner[edges[e][1]]; - if ((s0 < 0.0f) == (s1 < 0.0f)) continue; - - float t = s0 / (s0 - s1); - t = std::clamp(t, 0.01f, 0.99f); - - const float* c0 = cornerOffF[edges[e][0]]; - const float* c1 = cornerOffF[edges[e][1]]; - sumX += c0[0] + t * (c1[0] - c0[0]); - sumY += c0[1] + t * (c1[1] - c0[1]); - sumZ += c0[2] + t * (c1[2] - c0[2]); - crossCount++; - } - - if (crossCount == 0) continue; - - float invCross = 1.0f / (float)crossCount; - // centroid in [0,1] within the cell - float cx = sumX * invCross; - float cy = sumY * invCross; - float cz = sumZ * invCross; - - // ── Per-axis clamping at blocky boundaries ─────────── - // With +0.5 offset, the cell spans [x+0.5, x+1.5] in world space. - // The integer grid (blocky faces) is at x+1. In centroid coords, - // that's centroid = 0.5 (the midpoint of the cell). - // If the +side corners (dx=1) contain a blocky solid, clamp centroid ≤ 0.5 - // If the -side corners (dx=0) contain a blocky solid, clamp centroid ≥ 0.5 - // This prevents the smooth mesh from extending into blocky territory. - bool blockyXlo = false, blockyXhi = false; - bool blockyYlo = false, blockyYhi = false; - bool blockyZlo = false, blockyZhi = false; - for (int c = 0; c < 8; c++) { - if (corner[c] >= 0.0f) continue; // empty corner - VoxelData v = voxelGrid[gridIdx( - x + cornerOff[c][0], y + cornerOff[c][1], z + cornerOff[c][2])]; - if (!v.isEmpty() && !v.isSmooth()) { - // This corner is a blocky solid - if (cornerOff[c][0] == 0) blockyXlo = true; else blockyXhi = true; - if (cornerOff[c][1] == 0) blockyYlo = true; else blockyYhi = true; - if (cornerOff[c][2] == 0) blockyZlo = true; else blockyZhi = true; - } - } - if (blockyXhi) cx = std::min(cx, 0.5f); - if (blockyXlo) cx = std::max(cx, 0.5f); - if (blockyYhi) cy = std::min(cy, 0.5f); - if (blockyYlo) cy = std::max(cy, 0.5f); - if (blockyZhi) cz = std::min(cz, 0.5f); - if (blockyZlo) cz = std::max(cz, 0.5f); - - // World position with +0.5 offset (SDF at voxel centers) - float vx = (float)x + 0.5f + cx; - float vy = (float)y + 0.5f + cy; - float vz = (float)z + 0.5f + cz; - - // Determine material: prefer smooth voxels' materials to avoid - // picking up subsurface blocky materials (e.g., dirt under stone) - uint8_t smoothMatCounts[256] = {}; - uint8_t allMatCounts[256] = {}; - int smoothCount = 0; - for (int c = 0; c < 8; c++) { - if (corner[c] < 0.0f) { - VoxelData v = voxelGrid[gridIdx( - x + cornerOff[c][0], y + cornerOff[c][1], z + cornerOff[c][2])]; - if (!v.isEmpty()) { - allMatCounts[v.getMaterialID()]++; - if (v.isSmooth()) { - smoothMatCounts[v.getMaterialID()]++; - smoothCount++; - } - } - } - } - // Primary material: prefer smooth-only counts to avoid subsurface bleed - uint8_t* primaryCounts = (smoothCount > 0) ? smoothMatCounts : allMatCounts; - uint8_t bestMat = 6, bestCount = 0; - for (int m = 1; m < 256; m++) { - if (primaryCounts[m] > bestCount) { - bestMat = (uint8_t)m; bestCount = primaryCounts[m]; - } - } - // Secondary material: only count SURFACE-EXPOSED voxels (at least one - // empty neighbor). This prevents underground materials (dirt under stone) - // from bleeding through — same principle as blocky face blending. - static const int dirs6[6][3] = {{1,0,0},{-1,0,0},{0,1,0},{0,-1,0},{0,0,1},{0,0,-1}}; - uint8_t surfaceMatCounts[256] = {}; - for (int c = 0; c < 8; c++) { - if (corner[c] >= 0.0f) continue; - int cx = x + cornerOff[c][0], cy = y + cornerOff[c][1], cz = z + cornerOff[c][2]; - VoxelData v = voxelGrid[gridIdx(cx, cy, cz)]; - if (v.isEmpty()) continue; - // Check if this voxel is on the surface - bool onSurface = false; - for (int d = 0; d < 6 && !onSurface; d++) { - if (sdf[gridIdx(cx + dirs6[d][0], cy + dirs6[d][1], cz + dirs6[d][2])] > 0.0f) - onSurface = true; - } - if (onSurface) surfaceMatCounts[v.getMaterialID()]++; - } - uint8_t secMat = bestMat, secCount = 0; - for (int m = 1; m < 256; m++) { - if (m == bestMat) continue; - if (surfaceMatCounts[m] > secCount) { - secMat = (uint8_t)m; secCount = surfaceMatCounts[m]; - } - } - // blendWeight: binary flag — 255 at material boundary, 0 at interior. - // GPU interpolation creates the smooth edge-to-interior falloff. - uint8_t blendW = (secCount > 0 && secMat != bestMat) ? 255 : 0; - - // Store vertex (normals zeroed — computed later from face normals in Step 4) - int32_t vertIdx = (int32_t)chunk.smoothVertices.size(); - vertexMap[vertMapIdx(x, y, z)] = vertIdx; - - SmoothVertex sv; - sv.px = ox + vx; - sv.py = oy + vy; - sv.pz = oz + vz; - sv.nx = 0; - sv.ny = 0; - sv.nz = 0; - sv.materialID = bestMat; - sv.secondaryMat = secMat; - sv.blendWeight = blendW; - sv._pad1 = 0; - sv.chunkIndex = 0; - sv._pad2 = 0; - chunk.smoothVertices.push_back(sv); - } - } - } - - if (chunk.smoothVertices.empty()) { - chunk.hasSmooth = false; - return 0; - } - - // ── Step 3: Emit quads for edges with sign change ──────────── - // Canonical ownership: this chunk owns edges whose lower endpoint - // is in [0, CHUNK_SIZE). Extended to check edges at the chunk - // boundary (lower endpoint at CHUNK_SIZE-1, upper at CHUNK_SIZE). - // The sharing cells may be at [-1..CHUNK_SIZE-1], all covered by vertex map. - - // Tri with edge axis info for correct normal orientation. - // normalAxis: 0=X, 1=Y, 2=Z — the axis of the edge that generated this quad. - // normalSign: +1 if the normal should point in +axis direction, -1 for -axis. - struct Tri { int32_t a, b, c; int8_t normalAxis; int8_t normalSign; }; - std::vector triangles; - triangles.reserve(chunk.smoothVertices.size() * 2); - - // Helper: safe vertex map lookup (returns -1 if out of range) - auto safeVertMap = [&](int x, int y, int z) -> int32_t { - if (x < VERT_MIN || x >= VERT_MAX || - y < VERT_MIN || y >= VERT_MAX || - z < VERT_MIN || z >= VERT_MAX) return -1; - return vertexMap[vertMapIdx(x, y, z)]; - }; - - // Helper: emit 2 triangles for a quad (a,b,c,d) with known desired normal. - // The Y-axis sharing cells have a different spatial arrangement from X and Z, - // requiring opposite winding to produce correct front-facing triangles. - auto emitQuad = [&](int a, int b, int c, int d, float s0, int8_t axis) { - if (a < 0 || b < 0 || c < 0 || d < 0) return; - int8_t sign = (s0 < 0.0f) ? +1 : -1; - // Y-axis has natural winding swapped relative to X and Z - bool useWindingA = (s0 > 0.0f); - if (axis == 1) useWindingA = !useWindingA; - if (useWindingA) { - triangles.push_back({a, b, d, axis, sign}); - triangles.push_back({a, d, c, axis, sign}); - } else { - triangles.push_back({a, d, b, axis, sign}); - triangles.push_back({a, c, d, axis, sign}); - } - }; - - // Iterate over edges owned by this chunk: grid points [0, CHUNK_SIZE) - for (int z = 0; z < CHUNK_SIZE; z++) { - for (int y = 0; y < CHUNK_SIZE; y++) { - for (int x = 0; x < CHUNK_SIZE; x++) { - float s0 = sdf[gridIdx(x, y, z)]; - - // X-axis edge: (x,y,z) → (x+1,y,z) - { - float s1 = sdf[gridIdx(x+1, y, z)]; - if ((s0 < 0.0f) != (s1 < 0.0f)) { - emitQuad( - safeVertMap(x, y-1, z-1), safeVertMap(x, y, z-1), - safeVertMap(x, y-1, z), safeVertMap(x, y, z), - s0, 0); - } - } - - // Y-axis edge: (x,y,z) → (x,y+1,z) - { - float s1 = sdf[gridIdx(x, y+1, z)]; - if ((s0 < 0.0f) != (s1 < 0.0f)) { - emitQuad( - safeVertMap(x-1, y, z-1), safeVertMap(x, y, z-1), - safeVertMap(x-1, y, z), safeVertMap(x, y, z), - s0, 1); - } - } - - // Z-axis edge: (x,y,z) → (x,y,z+1) - { - float s1 = sdf[gridIdx(x, y, z+1)]; - if ((s0 < 0.0f) != (s1 < 0.0f)) { - emitQuad( - safeVertMap(x-1, y-1, z), safeVertMap(x, y-1, z), - safeVertMap(x-1, y, z), safeVertMap(x, y, z), - s0, 2); - } - } - } - } - } - - // ── Step 4: Compute smooth vertex normals ────────────────────── - // Accumulate area-weighted face normals into each indexed vertex, - // then normalize. This gives Gouraud-style smooth shading across - // the Surface Nets mesh without adding geometry. - - const int vertCount = (int)chunk.smoothVertices.size(); - - // Zero out vertex normals (will accumulate face normals) - for (auto& sv : chunk.smoothVertices) { - sv.nx = 0; sv.ny = 0; sv.nz = 0; - } - - // For each triangle: compute oriented face normal, accumulate into vertices. - // The cross product magnitude is proportional to triangle area, so larger - // triangles contribute more — this is the standard area-weighted approach. - for (const auto& tri : triangles) { - const SmoothVertex& va = chunk.smoothVertices[tri.a]; - const SmoothVertex& vb = chunk.smoothVertices[tri.b]; - const SmoothVertex& vc = chunk.smoothVertices[tri.c]; - - float e1x = vb.px - va.px, e1y = vb.py - va.py, e1z = vb.pz - va.pz; - float e2x = vc.px - va.px, e2y = vc.py - va.py, e2z = vc.pz - va.pz; - float fnx = e1y * e2z - e1z * e2y; - float fny = e1z * e2x - e1x * e2z; - float fnz = e1x * e2y - e1y * e2x; - - // Orient using the known edge axis (same logic as before) - float component = (tri.normalAxis == 0) ? fnx : (tri.normalAxis == 1) ? fny : fnz; - if ((component > 0.0f) != (tri.normalSign > 0)) { - fnx = -fnx; fny = -fny; fnz = -fnz; - } - - // Accumulate (area-weighted — cross product magnitude IS the area×2) - chunk.smoothVertices[tri.a].nx += fnx; - chunk.smoothVertices[tri.a].ny += fny; - chunk.smoothVertices[tri.a].nz += fnz; - chunk.smoothVertices[tri.b].nx += fnx; - chunk.smoothVertices[tri.b].ny += fny; - chunk.smoothVertices[tri.b].nz += fnz; - chunk.smoothVertices[tri.c].nx += fnx; - chunk.smoothVertices[tri.c].ny += fny; - chunk.smoothVertices[tri.c].nz += fnz; - } - - // Normalize accumulated vertex normals - for (auto& sv : chunk.smoothVertices) { - float len = std::sqrt(sv.nx*sv.nx + sv.ny*sv.ny + sv.nz*sv.nz); - if (len > 0.0001f) { - sv.nx /= len; sv.ny /= len; sv.nz /= len; - } else { - sv.nx = 0; sv.ny = 1; sv.nz = 0; - } - } - - // ── Step 5: Expand indexed triangles to triangle list ───────── - std::vector expanded; - expanded.reserve(triangles.size() * 3); - for (const auto& tri : triangles) { - expanded.push_back(chunk.smoothVertices[tri.a]); - expanded.push_back(chunk.smoothVertices[tri.b]); - expanded.push_back(chunk.smoothVertices[tri.c]); - } - - chunk.smoothVertices = std::move(expanded); - chunk.smoothVertexCount = (uint32_t)chunk.smoothVertices.size(); - - return chunk.smoothVertexCount; -} +// The CPU SmoothMesher has been removed. Smooth meshing is now handled +// exclusively by the GPU compute shaders (voxelSmoothCentroidCS.hlsl +// + voxelSmoothCS.hlsl) which include crease-angle correction for +// correct normals at sharp edges (e.g. vertical walls). } // namespace voxel diff --git a/src/voxel/VoxelMesher.h b/src/voxel/VoxelMesher.h index b0322a9..49f329f 100644 --- a/src/voxel/VoxelMesher.h +++ b/src/voxel/VoxelMesher.h @@ -37,25 +37,4 @@ private: int x, int y, int z, uint8_t face); }; -// ── Naive Surface Nets Mesher (Phase 5) ───────────────────────── -// Generates smooth triangle mesh for voxels marked FLAG_SMOOTH. -// Algorithm: one vertex per surface cell, positioned at edge-crossing centroid. -// Quads emitted for each edge with sign change, then split into 2 triangles. -class SmoothMesher { -public: - // Mesh smooth voxels in a chunk, populating chunk.smoothVertices. - // Returns number of smooth vertices generated (always multiple of 3, triangle list). - static uint32_t meshChunk(Chunk& chunk, const VoxelWorld& world); - -private: - // SDF value at a voxel position (solid smooth = -1, empty = +1) - // Non-smooth solid voxels are treated as walls (SDF = -1 at boundary) - static float computeSDF(const Chunk& chunk, const VoxelWorld& world, - int x, int y, int z); - - // Compute SDF gradient (numerical central differences) for normal - static void computeNormal(const Chunk& chunk, const VoxelWorld& world, - int x, int y, int z, float& nx, float& ny, float& nz); -}; - } // namespace voxel diff --git a/src/voxel/VoxelRenderer.cpp b/src/voxel/VoxelRenderer.cpp index 1ad7a69..678fb1f 100644 --- a/src/voxel/VoxelRenderer.cpp +++ b/src/voxel/VoxelRenderer.cpp @@ -1108,84 +1108,7 @@ void VoxelRenderer::renderTopings( dev->RenderPassEnd(cmd); } -// ── Phase 5: Smooth Surface Nets upload + rendering ───────────── - -void VoxelRenderer::uploadSmoothData(VoxelWorld& world) { - if (!device_ || !smoothPso_.IsValid()) return; - - // Collect all smooth vertices from all chunks, stamping each with its chunkIndex. - // The chunkIndex must match the order in chunkInfoBuffer_ (assigned by forEachChunk). - // Reuse a persistent staging vector to avoid per-frame allocations. - smoothStagingVerts_.clear(); - if (smoothStagingVerts_.capacity() < 64 * 1024) - smoothStagingVerts_.reserve(64 * 1024); - - uint32_t chunkIdx = 0; - world.forEachChunk([&](const ChunkPos& pos, Chunk& chunk) { - if (chunk.hasSmooth && chunk.smoothVertexCount > 0) { - for (auto& sv : chunk.smoothVertices) { - sv.chunkIndex = (uint16_t)chunkIdx; - } - smoothStagingVerts_.insert(smoothStagingVerts_.end(), - chunk.smoothVertices.begin(), - chunk.smoothVertices.end()); - } - chunkIdx++; - }); - - smoothVertexCount_ = (uint32_t)std::min(smoothStagingVerts_.size(), (size_t)MAX_SMOOTH_VERTICES); - - if (smoothVertexCount_ == 0) { - smoothDirty_ = false; - return; - } - - // Pre-allocate smooth buffer; only recreate when capacity needs to grow. - if (smoothVertexBuf_.ensureCapacity(device_, smoothVertexCount_, sizeof(SmoothVertex), - BindFlag::SHADER_RESOURCE)) { - wi::backlog::post("Smooth: allocated vertex buffer (" + std::to_string(smoothVertexBuf_.capacity) - + " capacity, " + std::to_string(smoothVertexBuf_.capacity * sizeof(SmoothVertex) / 1024) + " KB)"); - } else { - smoothVertexBuf_.markDirty(); // deferred upload in Render() - } - - smoothDirty_ = false; -} - -void VoxelRenderer::uploadSmoothDataFast(VoxelWorld& world) { - if (!device_ || !smoothPso_.IsValid()) return; - - // Fast path: chunkIndex already stamped during parallel meshChunk. - // Just collect vertices (no per-vertex stamping needed). - smoothStagingVerts_.clear(); - if (smoothStagingVerts_.capacity() < 64 * 1024) - smoothStagingVerts_.reserve(64 * 1024); - - world.forEachChunk([&](const ChunkPos& pos, Chunk& chunk) { - if (chunk.hasSmooth && chunk.smoothVertexCount > 0) { - smoothStagingVerts_.insert(smoothStagingVerts_.end(), - chunk.smoothVertices.begin(), - chunk.smoothVertices.end()); - } - }); - - smoothVertexCount_ = (uint32_t)std::min(smoothStagingVerts_.size(), (size_t)MAX_SMOOTH_VERTICES); - - if (smoothVertexCount_ == 0) { - smoothDirty_ = false; - return; - } - - // Pre-allocate smooth buffer; only recreate when capacity needs to grow. - if (smoothVertexBuf_.ensureCapacity(device_, smoothVertexCount_, sizeof(SmoothVertex), - BindFlag::SHADER_RESOURCE)) { - // Buffer recreated with 25% headroom - } else { - smoothVertexBuf_.markDirty(); // deferred upload in Render() - } - - smoothDirty_ = false; -} +// ── Phase 5: Smooth Surface Nets rendering (GPU compute only) ─── void VoxelRenderer::renderSmooth( CommandList cmd, @@ -1193,10 +1116,9 @@ void VoxelRenderer::renderSmooth( const Texture& renderTarget, const Texture& normalTarget ) const { - // Use GPU-generated smooth buffer if available, otherwise CPU buffer - const bool useGpuSmooth = smoothCentroidShader_.IsValid() && smoothMeshShader_.IsValid(); - const auto& smoothBuf = useGpuSmooth ? gpuSmoothVertexBuffer_ : smoothVertexBuf_.gpu; - uint32_t vertCount = useGpuSmooth ? gpuSmoothVertexCount_ : smoothVertexCount_; + // GPU compute smooth buffer only (CPU fallback removed) + const auto& smoothBuf = gpuSmoothVertexBuffer_; + uint32_t vertCount = gpuSmoothVertexCount_; if (!smoothPso_.IsValid() || !smoothBuf.IsValid() || vertCount == 0) return; @@ -1306,41 +1228,10 @@ void VoxelRenderPath::Start() { wi::backlog::post(msg); } - // Phase 5: Smooth surface mesh — GPU path or CPU fallback + // Phase 5: Smooth surface mesh — GPU compute only, dispatched in first Render() if (renderer.isInitialized()) { - if (renderer.smoothCentroidShader_.IsValid() && renderer.smoothMeshShader_.IsValid()) { - // GPU smooth mesher available — will dispatch in first Render() - renderer.gpuSmoothMeshDirty_ = true; - wi::backlog::post("SmoothMesher: GPU path active, dispatch deferred to Render()"); - } else { - // CPU fallback: Surface Nets mesh for smooth voxels (parallelized) - std::vector chunkPtrs; - world.forEachChunk([&](const ChunkPos& pos, Chunk& chunk) { - chunkPtrs.push_back(&chunk); - }); - const VoxelWorld& worldRef = world; - wi::jobsystem::context smoothCtx; - wi::jobsystem::Dispatch(smoothCtx, (uint32_t)chunkPtrs.size(), 1, - [&chunkPtrs, &worldRef](wi::jobsystem::JobArgs args) { - SmoothMesher::meshChunk(*chunkPtrs[args.jobIndex], worldRef); - }); - wi::jobsystem::Wait(smoothCtx); - - uint32_t totalSmooth = 0; - uint32_t smoothChunks = 0; - for (auto* c : chunkPtrs) { - if (c->smoothVertexCount > 0) { - totalSmooth += c->smoothVertexCount; - smoothChunks++; - } - } - renderer.uploadSmoothData(world); - char msg[256]; - snprintf(msg, sizeof(msg), - "SmoothMesher: %u vertices (%u tris) in %u chunks", - totalSmooth, totalSmooth / 3, smoothChunks); - wi::backlog::post(msg); - } + renderer.gpuSmoothMeshDirty_ = true; + wi::backlog::post("SmoothMesher: GPU path active, dispatch deferred to Render()"); } worldGenerated_ = true; @@ -1584,31 +1475,8 @@ void VoxelRenderPath::Update(float dt) { renderer.gpuMeshDirty_ = true; renderer.rt_.aoHistoryValid = false; - // Re-mesh smooth surfaces — GPU path or CPU fallback - if (renderer.smoothCentroidShader_.IsValid() && renderer.smoothMeshShader_.IsValid()) { - renderer.gpuSmoothMeshDirty_ = true; - } else { - auto ts0 = std::chrono::high_resolution_clock::now(); - std::vector chunkPtrs; - world.forEachChunk([&](const ChunkPos& pos, Chunk& chunk) { - chunkPtrs.push_back(&chunk); - }); - const VoxelWorld& worldRef = world; - wi::jobsystem::context ctx; - wi::jobsystem::Dispatch(ctx, (uint32_t)chunkPtrs.size(), 1, - [&chunkPtrs, &worldRef](wi::jobsystem::JobArgs args) { - uint32_t idx = args.jobIndex; - SmoothMesher::meshChunk(*chunkPtrs[idx], worldRef); - for (auto& sv : chunkPtrs[idx]->smoothVertices) - sv.chunkIndex = (uint16_t)idx; - }); - wi::jobsystem::Wait(ctx); - auto ts1 = std::chrono::high_resolution_clock::now(); - prof_.smoothMesh.add(std::chrono::duration(ts1 - ts0).count()); - renderer.uploadSmoothDataFast(world); - auto ts2 = std::chrono::high_resolution_clock::now(); - prof_.smoothUpload.add(std::chrono::duration(ts2 - ts1).count()); - } + // Re-mesh smooth surfaces — GPU compute only + renderer.gpuSmoothMeshDirty_ = true; // Re-collect toping instances — parallelized { @@ -1710,8 +1578,6 @@ void VoxelRenderPath::Render() const { // topingInstanceBuf_ must be filled before dispatchTopingBLASExtract reads it (t5) renderer.topingInstanceBuf_.upload(device, cmd, renderer.topingGpuInsts_.data(), (uint32_t)renderer.topingGpuInsts_.size()); - renderer.smoothVertexBuf_.upload(device, cmd, - renderer.smoothStagingVerts_.data(), renderer.smoothVertexCount_); // ── GPU compute toping BLAS extraction ── // Skip during animation (toping BLAS is skipped to save ~130ms GPU) @@ -1746,8 +1612,7 @@ void VoxelRenderPath::Render() const { rt.dispatchBLASExtract(cmd, renderer.gpuQuadBuffer_, renderer.chunkInfoBuffer_, renderer.gpuMeshQuadCount_); - bool useGpuSmooth = renderer.smoothCentroidShader_.IsValid() && renderer.smoothMeshShader_.IsValid(); - const auto& smoothVB = useGpuSmooth ? renderer.gpuSmoothVertexBuffer_ : renderer.smoothVertexBuf_.gpu; + const auto& smoothVB = renderer.gpuSmoothVertexBuffer_; if (anim_.terrainAnimated) { uint32_t flags = (rtBuildSkipCounter_ & 1) diff --git a/src/voxel/VoxelRenderer.h b/src/voxel/VoxelRenderer.h index f965251..7231593 100644 --- a/src/voxel/VoxelRenderer.h +++ b/src/voxel/VoxelRenderer.h @@ -115,12 +115,7 @@ private: wi::graphics::Shader smoothPS_; wi::graphics::RasterizerState smoothRasterizer_; wi::graphics::PipelineState smoothPso_; - DeferredGPUBuffer smoothVertexBuf_; // StructuredBuffer, SRV t6 - std::vector smoothStagingVerts_; // persistent staging buffer (avoids per-frame alloc) - static constexpr uint32_t MAX_SMOOTH_VERTICES = 4 * 1024 * 1024; // 4M vertices max - mutable uint32_t smoothVertexCount_ = 0; mutable uint32_t smoothDrawCalls_ = 0; - bool smoothDirty_ = true; // Texture arrays for materials (512x512, 6 layers each) wi::graphics::Texture textureArray_; // RGBA: RGB=albedo, A=heightmap (t1) @@ -262,16 +257,14 @@ public: ) const; uint32_t getTopingDrawCalls() const { return topingDrawCalls_; } - // Phase 5: Smooth surface rendering - void uploadSmoothData(VoxelWorld& world); - void uploadSmoothDataFast(VoxelWorld& world); // chunkIndex already stamped + // Phase 5: Smooth surface rendering (GPU compute only) void renderSmooth( wi::graphics::CommandList cmd, const wi::graphics::Texture& depthBuffer, const wi::graphics::Texture& renderTarget, const wi::graphics::Texture& normalTarget ) const; - uint32_t getSmoothVertexCount() const { return (smoothCentroidShader_.IsValid() && smoothMeshShader_.IsValid()) ? gpuSmoothVertexCount_ : smoothVertexCount_; } + uint32_t getSmoothVertexCount() const { return gpuSmoothVertexCount_; } uint32_t getSmoothDrawCalls() const { return smoothDrawCalls_; } // Phase 6: Ray Tracing (delegated to VoxelRTManager) @@ -333,8 +326,8 @@ struct VoxelProfiler { ProfileAccum updateMeshes; // updateMeshes (rebuildChunkInfoOnly) ProfileAccum topingCollect; // topingSystem.collectInstances ProfileAccum topingUpload; // uploadTopingData - ProfileAccum smoothMesh; // SmoothMesher::meshChunk (all chunks) - ProfileAccum smoothUpload; // uploadSmoothData + ProfileAccum smoothMesh; // (legacy, unused — GPU smooth only) + ProfileAccum smoothUpload; // (legacy, unused — GPU smooth only) ProfileAccum frame; // full frame (Update only - legacy) // Render() phase diff --git a/src/voxel/VoxelWorld.h b/src/voxel/VoxelWorld.h index 6cebecb..f3a0ab8 100644 --- a/src/voxel/VoxelWorld.h +++ b/src/voxel/VoxelWorld.h @@ -19,10 +19,7 @@ struct Chunk { uint32_t faceOffsets[6] = {}; // offset (in quads) for each face group within quads[] uint32_t faceCounts[6] = {}; // number of quads per face group - // Smooth mesh data (output of Surface Nets mesher, Phase 5) - std::vector smoothVertices; - uint32_t smoothVertexCount = 0; - bool hasSmooth = false; // true if chunk has smooth mesh output (set by mesher) + // Smooth voxel flags (used by GPU smooth mesher to decide which chunks to dispatch) bool containsSmooth = false; // true if chunk contains any FLAG_SMOOTH voxels (set during generation) // Cached surface material per column (set during initial generation, reused during animation)