diff --git a/CLAUDE.md b/CLAUDE.md index 3a336ad..9c43f71 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -593,21 +593,22 @@ Système de biseaux décoratifs (« topings ») sur les faces +Y exposées pour - **Intégré dans `voxelShadowCS.hlsl`** : 8 rayons hémisphère cosine-weighted par pixel + 1 rayon soleil - **Distance-weighted AO** : `(1 - hitT/aoRadius)²` — falloff quadratique, valeurs continues au lieu de binaire hit/miss -- **World-space hash stable** : seed = `floor(worldPos - N*0.5)` (voxel solide derrière la surface) + `frac(dot(worldPos, T/B)) * 256` (position fractionnaire sur les axes tangents uniquement — l'axe normal est exclu car il oscille à cause de la précision du depth buffer) +- **Interleaved Gradient Noise (IGN)** : remplace le hash world-space pour le sampling. Bruit structuré screen-space avec excellentes propriétés spectrales (Jorge Jimenez, 2014) +- **Cranley-Patterson rotation** : `frac(IGN + frameIndex * φ)` — chaque frame explore de nouvelles directions de rayons. Golden ratio (φ ≈ 0.618) assure une couverture maximale de l'hémisphère au fil des frames +- **Accumulation temporelle** : `lerp(history, current, 0.05)` ≈ accumulation de ~20 frames + - `aoHistoryTexture_` (R8_UNORM) persiste entre frames + - `prevViewProjection` dans VoxelCB pour reprojection worldPos → UV du frame précédent + - Rejet si UV hors écran (disocclusion basique) + - `frameIndex` + `historyValid` dans les push constants + - Copy `aoRaw → aoHistory` entre le shadow CS et le blur (capture le signal pré-blur) - **Bilateral blur séparable** (`voxelAOBlurCS.hlsl`) : 2 passes H+V, rayon 6 (kernel 13×13), edge-stopping sur depth + normals -- **Pipeline 4 passes** : - 1. Shadow CS : shadow in-place sur `voxelRT_` + AO brut → `aoRawTexture_` (R8_UNORM, u1) - 2. Blur H : `aoRawTexture_` → `aoBlurredTexture_` (bilateral, depth/normal edge-stopping) - 3. Blur V : `aoBlurredTexture_` → `aoRawTexture_` (idem, direction verticale) - 4. Apply : `aoRawTexture_` × `voxelRT_` → modulation finale (ou debug AO grayscale si debugMode=2) -- **Frisvad orthonormal basis** : construction robuste de (T,B) depuis N pour le hemisphere sampling -- **Cosine-weighted hemisphere** : `sqrt(u1)` distribution pour importance sampling -- **Push constants** : width, height, normalBias, shadowMaxDist, debugMode, aoRadius, aoRayCount, aoStrength -- **Pièges résolus** : - - **Hash screen-space → suit la caméra** : résolu en utilisant uniquement des coordonnées world-space - - **Hash `asuint(worldPos)` → clignote** : trop sensible aux variations FP du depth buffer, résolu par quantification au voxel + tangent frac - - **Hash `frac(worldPos)` sur axe normal → clignote sur ~30% des faces** : l'axe normal est à une frontière entière (ex: face +Y à y=5.0000) où `frac()` oscille entre ~0 et ~1. Résolu en projetant sur T/B uniquement - - **`floor(worldPos + 0.5)` → artefact au milieu des faces** : la coordonnée traverse 0.5 au centre de la face. Résolu par offset `-N*0.5` pour atterrir dans le voxel solide +- **Pipeline 5 passes** : + 1. Shadow CS : shadow in-place sur `voxelRT_` + AO temporellement accumulé → `aoRawTexture_` (u1) + 2. Copy : `aoRawTexture_` → `aoHistoryTexture_` (pour le frame suivant, pré-blur) + 3. Blur H : `aoRawTexture_` → `aoBlurredTexture_` (bilateral, depth/normal edge-stopping) + 4. Blur V : `aoBlurredTexture_` → `aoRawTexture_` (idem, direction verticale) + 5. Apply : `aoRawTexture_` × `voxelRT_` → modulation finale (ou debug AO grayscale si debugMode=2) +- **Push constants** : width, height, normalBias, shadowMaxDist, debugMode, aoRadius, aoRayCount, aoStrength, frameIndex, historyValid #### Phase 6.4 - Fallback [A FAIRE] diff --git a/shaders/voxelCommon.hlsli b/shaders/voxelCommon.hlsli index a71e633..42cda66 100644 --- a/shaders/voxelCommon.hlsli +++ b/shaders/voxelCommon.hlsli @@ -39,6 +39,7 @@ cbuffer VoxelCB : register(b0) { float4x4 viewProjection; float4x4 inverseViewProjection; // for depth-to-world reconstruction (RT shadows) + float4x4 prevViewProjection; // previous frame's VP for temporal reprojection (RT AO) float4 cameraPosition; float4 sunDirection; float4 sunColor; diff --git a/shaders/voxelShadowCS.hlsl b/shaders/voxelShadowCS.hlsl index 2ed8cb9..64e47c4 100644 --- a/shaders/voxelShadowCS.hlsl +++ b/shaders/voxelShadowCS.hlsl @@ -1,5 +1,6 @@ // BVLE Voxels - RT Shadow + AO Compute Shader (Phase 6.2 + 6.3) // Per-pixel: traces 1 shadow ray toward sun + N hemisphere rays for AO. +// Temporal accumulation: blends current AO with reprojected history. // Modulates voxelRT_ in-place via RWTexture2D. #include "voxelCommon.hlsli" @@ -8,6 +9,7 @@ Texture2D depthTexture : register(t0); // voxelDepth_ (D32_FLOAT as R32_FLOAT SRV) Texture2D normalTexture : register(t1); // voxelNormalRT_ (R16G16B16A16_SNORM) RaytracingAccelerationStructure tlas : register(t2); // TLAS with blocky + smooth instances +Texture2D aoHistory : register(t3); // previous frame's AO (temporally accumulated) // UAV outputs RWTexture2D colorOutput : register(u0); // voxelRT_ (shadow applied in-place) @@ -23,19 +25,23 @@ struct ShadowPush { float aoRadius; // max distance for AO rays (e.g. 8.0 voxels) uint aoRayCount; // number of hemisphere rays (e.g. 6) float aoStrength; // how dark full occlusion is (e.g. 0.35 = 65% darkening) - uint pad[4]; + uint frameIndex; // for temporal rotation of noise pattern + uint historyValid; // 0 = no history (first frame), 1 = blend with history + uint pad[2]; }; [[vk::push_constant]] ConstantBuffer push : register(b999); -// ── Hash-based pseudo-random for AO ray directions ────────────── -// Golden ratio hash: deterministic, no texture lookup, good distribution -float hashF(uint x) { - x ^= x >> 16; - x *= 0x45d9f3bu; - x ^= x >> 16; - return float(x & 0xFFFFFF) / float(0xFFFFFF); +// ── Interleaved Gradient Noise (Jorge Jimenez, 2014) ──────────── +// Screen-space low-frequency noise with excellent spectral properties. +// Combined with Cranley-Patterson rotation per frame for temporal variation. +float interleavedGradientNoise(float2 pixelCoord) { + return frac(52.9829189 * frac(dot(pixelCoord, float2(0.06711056, 0.00583715)))); } +// Golden ratio for Cranley-Patterson rotation +static const float GOLDEN_RATIO = 0.618033988749895; + +// ── Hash (kept for voxel-coord seed) ──────────────────────────── uint hashU(uint a, uint b) { a ^= b * 0x9E3779B9u; a ^= a >> 16; @@ -43,6 +49,13 @@ uint hashU(uint a, uint b) { return a; } +float hashF(uint x) { + x ^= x >> 16; + x *= 0x45d9f3bu; + x ^= x >> 16; + return float(x & 0xFFFFFF) / float(0xFFFFFF); +} + // Build orthonormal basis from normal (Frisvad's method, robust for all N) void buildBasis(float3 N, out float3 T, out float3 B) { if (N.z < -0.9999) { @@ -73,6 +86,7 @@ void main(uint3 DTid : SV_DispatchThreadID) { float depth = depthTexture[DTid.xy]; if (depth == 0.0) { + aoOutput[DTid.xy] = 1.0; if (push.debugMode > 0) colorOutput[DTid.xy] = float4(0.1, 0.1, 0.1, 1); return; } @@ -110,41 +124,38 @@ void main(uint3 DTid : SV_DispatchThreadID) { } } - // ── AO: hemisphere rays ──────────────────────────────────── + // ── AO: hemisphere rays with IGN + temporal rotation ────── float aoFactor = 1.0; uint rayCount = push.aoRayCount; if (rayCount > 0) { float3 T, B; buildBasis(N, T, B); - // Fully world-space seed: solid voxel coord + tangent-plane frac position - // vc: offset by -N*0.5 → inside the solid voxel (stable per face) - // Sub-voxel: only use the 2 tangent axes (T,B), NOT the normal axis. - // The normal axis sits at an integer boundary (e.g. face +Y → y=5.0000) - // where frac() oscillates between ~0 and ~1 due to depth precision → flicker. - // Tangent axes vary smoothly across the face → always stable. - int3 vc = int3(floor(worldPos - N * 0.5)); - float tFrac = frac(dot(worldPos, T)); - float bFrac = frac(dot(worldPos, B)); - uint st = uint(tFrac * 256.0); - uint sb = uint(bFrac * 256.0); - uint baseSeed = hashU(hashU((uint)(vc.x + 32768), (uint)(vc.y + 32768)), (uint)(vc.z + 32768)); - uint pixelSeed = hashU(baseSeed, hashU(st, sb)); + // IGN base noise: well-distributed screen-space pattern + // Cranley-Patterson rotation: offset by golden ratio * frameIndex + // Each frame explores different ray directions → temporal accumulation converges + float frameRotation = float(push.frameIndex) * GOLDEN_RATIO; + float totalOcclusion = 0.0; [loop] for (uint i = 0; i < rayCount; i++) { - // Per-ray random: hash(pixelSeed, rayIndex) - uint seed = hashU(pixelSeed, i); - float u1 = hashF(seed); - float u2 = hashF(seed ^ 0xA5A5A5A5u); + // Per-ray IGN with spatial offset to decorrelate rays + // Each ray uses a different pixel offset → different IGN value + float2 rayPixel = float2(DTid.xy) + float2(i * 7.0, i * 3.0); + float ign = interleavedGradientNoise(rayPixel); + + // Cranley-Patterson rotation: shift by golden ratio per frame + per ray + float u1 = frac(ign + frameRotation + float(i) * GOLDEN_RATIO); + float u2 = frac(interleavedGradientNoise(rayPixel + float2(47.0, 17.0)) + + frameRotation + float(i) * 0.381966011250105); // 1/φ² float3 dir = cosineSampleHemisphere(u1, u2, N, T, B); RayDesc aoRay; aoRay.Origin = origin; aoRay.Direction = dir; - aoRay.TMin = 0.05; // larger TMin for AO to avoid edge self-intersection + aoRay.TMin = 0.05; aoRay.TMax = push.aoRadius; RayQuery aoQ; @@ -152,10 +163,9 @@ void main(uint3 DTid : SV_DispatchThreadID) { [loop] while (aoQ.Proceed()) {} if (aoQ.CommittedStatus() == COMMITTED_TRIANGLE_HIT) { - // Distance-weighted: close hits = strong occlusion, far hits = weak float hitT = aoQ.CommittedRayT(); float falloff = 1.0 - saturate(hitT / push.aoRadius); - totalOcclusion += falloff * falloff; // quadratic for natural falloff + totalOcclusion += falloff * falloff; } } @@ -163,11 +173,28 @@ void main(uint3 DTid : SV_DispatchThreadID) { aoFactor = 1.0 - occlusionRatio * push.aoStrength; } + // ── Temporal accumulation ──────────────────────────────────── + // Reproject current pixel to previous frame's screen space + if (push.historyValid != 0) { + float4 prevClip = mul(prevViewProjection, float4(worldPos, 1.0)); + float2 prevNDC = prevClip.xy / prevClip.w; + float2 prevUV = float2(prevNDC.x * 0.5 + 0.5, 0.5 - prevNDC.y * 0.5); + + // Check if reprojected UV is within screen bounds + if (prevUV.x >= 0.0 && prevUV.x < 1.0 && prevUV.y >= 0.0 && prevUV.y < 1.0) { + int2 prevPixel = int2(prevUV * float2(push.width, push.height)); + float historyAO = aoHistory.Load(int3(prevPixel, 0)); + + // Blend: low alpha = keep more history (smoother), high alpha = more responsive + float blendAlpha = 0.05; // accumulate ~20 frames + aoFactor = lerp(historyAO, aoFactor, blendAlpha); + } + } + // ── Write AO to separate buffer (will be blurred), apply shadow in-place ── aoOutput[DTid.xy] = aoFactor; if (push.debugMode == 1) { - // Debug shadows: red=shadow, green=lit, blue=backface if (NdotL <= 0.0) colorOutput[DTid.xy] = float4(0, 0, 0.5, 1); else if (shadowFactor < 1.0) @@ -175,11 +202,8 @@ void main(uint3 DTid : SV_DispatchThreadID) { else colorOutput[DTid.xy] = float4(0, 1, 0, 1); } else if (push.debugMode == 2) { - // Debug AO: raw AO written to aoOutput, will be visualized after blur - // Write white to color so blur apply pass shows AO only colorOutput[DTid.xy] = float4(1, 1, 1, 1); } else { - // Apply shadow only — AO applied after blur in a separate pass float4 color = colorOutput[DTid.xy]; color.rgb *= shadowFactor; colorOutput[DTid.xy] = color; diff --git a/src/voxel/VoxelRenderer.cpp b/src/voxel/VoxelRenderer.cpp index ee1cf22..d33a654 100644 --- a/src/voxel/VoxelRenderer.cpp +++ b/src/voxel/VoxelRenderer.cpp @@ -1217,7 +1217,7 @@ void VoxelRenderer::dispatchShadows(CommandList cmd, uint32_t gx = (w + 7) / 8; uint32_t gy = (h + 7) / 8; - // ── Pass 1: Shadow + raw AO ──────────────────────────────────── + // ── Pass 1: Shadow + raw AO (with temporal accumulation) ─────── { GPUBarrier preBarriers[] = { GPUBarrier::Image(&const_cast(depthBuffer), @@ -1230,11 +1230,12 @@ void VoxelRenderer::dispatchShadows(CommandList cmd, dev->Barrier(preBarriers, 3, cmd); dev->BindComputeShader(&shadowShader_, cmd); - dev->BindResource(&depthBuffer, 0, cmd); // t0 = depth - dev->BindResource(&normalTarget, 1, cmd); // t1 = normals - dev->BindResource(&tlas_, 2, cmd); // t2 = TLAS - dev->BindUAV(&renderTarget, 0, cmd); // u0 = color - dev->BindUAV(&aoRawTexture_, 1, cmd); // u1 = raw AO output + dev->BindResource(&depthBuffer, 0, cmd); // t0 = depth + dev->BindResource(&normalTarget, 1, cmd); // t1 = normals + dev->BindResource(&tlas_, 2, cmd); // t2 = TLAS + dev->BindResource(&aoHistoryTexture_, 3, cmd); // t3 = AO history (prev frame) + dev->BindUAV(&renderTarget, 0, cmd); // u0 = color + dev->BindUAV(&aoRawTexture_, 1, cmd); // u1 = raw AO output dev->BindConstantBuffer(&constantBuffer_, 0, cmd); struct ShadowPush { @@ -1244,7 +1245,9 @@ void VoxelRenderer::dispatchShadows(CommandList cmd, float aoRadius; uint32_t aoRayCount; float aoStrength; - uint32_t pad[4]; + uint32_t frameIndex; + uint32_t historyValid; + uint32_t pad[2]; } pushData = {}; pushData.width = w; pushData.height = h; @@ -1254,19 +1257,41 @@ void VoxelRenderer::dispatchShadows(CommandList cmd, pushData.aoRadius = 8.0f; pushData.aoRayCount = 8; pushData.aoStrength = 0.7f; + pushData.frameIndex = frameCounter_++; + pushData.historyValid = aoHistoryValid_ ? 1u : 0u; dev->PushConstants(&pushData, sizeof(pushData), cmd); dev->Dispatch(gx, gy, 1, cmd); } + // ── Pass 1.5: Copy raw AO → history (before blur, for next frame) ── + { + GPUBarrier copyBarriers[] = { + GPUBarrier::Image(&aoRawTexture_, + ResourceState::UNORDERED_ACCESS, ResourceState::COPY_SRC), + GPUBarrier::Image(&aoHistoryTexture_, + ResourceState::SHADER_RESOURCE, ResourceState::COPY_DST), + }; + dev->Barrier(copyBarriers, 2, cmd); + dev->CopyResource(&aoHistoryTexture_, &aoRawTexture_, cmd); + + GPUBarrier postCopyBarriers[] = { + GPUBarrier::Image(&aoRawTexture_, + ResourceState::COPY_SRC, ResourceState::SHADER_RESOURCE), + GPUBarrier::Image(&aoHistoryTexture_, + ResourceState::COPY_DST, ResourceState::SHADER_RESOURCE), + }; + dev->Barrier(postCopyBarriers, 2, cmd); + aoHistoryValid_ = true; + } + // ── Pass 2: Bilateral blur horizontal (aoRaw → aoBlurred) ────── { + // aoRawTexture_ already in SHADER_RESOURCE from copy pass GPUBarrier barriers[] = { - GPUBarrier::Image(&aoRawTexture_, - ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE), GPUBarrier::Image(&aoBlurredTexture_, ResourceState::SHADER_RESOURCE, ResourceState::UNORDERED_ACCESS), }; - dev->Barrier(barriers, 2, cmd); + dev->Barrier(barriers, 1, cmd); dev->BindComputeShader(&aoBlurShader_, cmd); dev->BindResource(&aoRawTexture_, 0, cmd); // t0 = AO input @@ -1413,6 +1438,7 @@ void VoxelRenderer::render( XMStoreFloat4x4(&cb.viewProjection, vpMatrix); XMMATRIX invVP = XMMatrixInverse(nullptr, vpMatrix); XMStoreFloat4x4(&cb.inverseViewProjection, invVP); + cb.prevViewProjection = prevViewProjection_; // from last frame cb.cameraPosition = XMFLOAT4(camera.Eye.x, camera.Eye.y, camera.Eye.z, 1.0f); cb.sunDirection = XMFLOAT4(-0.7f, -0.4f, -0.3f, 0.0f); // lower sun = longer cast shadows cb.sunColor = XMFLOAT4(1.2f, 1.1f, 0.9f, 1.0f); @@ -1421,14 +1447,12 @@ void VoxelRenderer::render( cb.blendEnabled = 1.0f; // Phase 3: PS-based blending enabled in GPU mesh path cb.debugBlend = debugBlend_ ? 1.0f : 0.0f; cb.chunkCount = chunkCount_; - // Per-material blend flags (bit N = material N): - // canBleed: material can overflow visually onto adjacent voxels - // resistBleed: adjacent materials cannot overflow onto this material - // Material IDs: 1=Grass, 2=Dirt, 3=Stone, 4=Sand, 5=Snow, 6=SmoothStone - cb.bleedMask = (1u << 1) | (1u << 2) | (1u << 4) | (1u << 5); // Grass, Dirt, Sand, Snow can bleed (NOT Stone/SmoothStone) - cb.resistBleedMask = (1u << 1); // Grass resists bleed (she bleeds onto others, not the reverse) + cb.bleedMask = (1u << 1) | (1u << 2) | (1u << 4) | (1u << 5); + cb.resistBleedMask = (1u << 1); cb.windTime = windTime_; dev->UpdateBuffer(&constantBuffer_, &cb, cmd, sizeof(cb)); + // Save current VP for next frame's temporal reprojection + XMStoreFloat4x4(&prevViewProjection_, vpMatrix); // Render pass (MRT: color + normals + depth) RenderPassImage rp[] = { @@ -2365,6 +2389,8 @@ void VoxelRenderPath::createRenderTargets() { aoDesc.layout = wi::graphics::ResourceState::SHADER_RESOURCE; device->CreateTexture(&aoDesc, nullptr, &renderer.aoRawTexture_); device->CreateTexture(&aoDesc, nullptr, &renderer.aoBlurredTexture_); + device->CreateTexture(&aoDesc, nullptr, &renderer.aoHistoryTexture_); + renderer.aoHistoryValid_ = false; // no history on first frame rtCreated_ = voxelRT_.IsValid() && voxelNormalRT_.IsValid() && voxelDepth_.IsValid() && renderer.aoRawTexture_.IsValid() && renderer.aoBlurredTexture_.IsValid(); diff --git a/src/voxel/VoxelRenderer.h b/src/voxel/VoxelRenderer.h index 3a681f3..e47a8bc 100644 --- a/src/voxel/VoxelRenderer.h +++ b/src/voxel/VoxelRenderer.h @@ -148,6 +148,7 @@ private: struct VoxelConstants { XMFLOAT4X4 viewProjection; XMFLOAT4X4 inverseViewProjection; // for depth-to-world reconstruction (RT shadows) + XMFLOAT4X4 prevViewProjection; // previous frame's VP for temporal reprojection (RT AO) XMFLOAT4 cameraPosition; XMFLOAT4 sunDirection; XMFLOAT4 sunColor; @@ -212,6 +213,10 @@ private: wi::graphics::Shader aoApplyShader_; // voxelAOApplyCS compute shader mutable wi::graphics::Texture aoRawTexture_; // R8_UNORM: raw AO from shadow CS mutable wi::graphics::Texture aoBlurredTexture_; // R8_UNORM: after bilateral blur + mutable wi::graphics::Texture aoHistoryTexture_; // R8_UNORM: previous frame's temporally accumulated AO + mutable XMFLOAT4X4 prevViewProjection_; // previous frame's VP matrix + mutable uint32_t frameCounter_ = 0; + mutable bool aoHistoryValid_ = false; mutable bool rtShadowsEnabled_ = false; // true when shader + TLAS ready mutable uint32_t rtShadowDebug_ = 0; // 0=off, 1=debug shadows, 2=debug AO