// BVLE Voxels - RT Shadow + AO Compute Shader (Phase 6.2 + 6.3) // Per-pixel: traces 1 shadow ray toward sun + N hemisphere rays for AO. // Temporal accumulation: blends current AO with reprojected history. // Modulates voxelRT_ in-place via RWTexture2D. #include "voxelCommon.hlsli" // SRV bindings Texture2D depthTexture : register(t0); // voxelDepth_ (D32_FLOAT as R32_FLOAT SRV) Texture2D normalTexture : register(t1); // voxelNormalRT_ (R16G16B16A16_SNORM) RaytracingAccelerationStructure tlas : register(t2); // TLAS with blocky + smooth instances Texture2D aoHistory : register(t3); // previous frame's AO (temporally accumulated) // UAV outputs RWTexture2D colorOutput : register(u0); // voxelRT_ (shadow applied in-place) RWTexture2D aoOutput : register(u1); // raw AO factor (blurred separately) // Push constants struct ShadowPush { uint width; uint height; float normalBias; float shadowMaxDist; uint debugMode; // 0=normal, 1=debug shadows, 2=debug AO float aoRadius; // max distance for AO rays (e.g. 8.0 voxels) uint aoRayCount; // number of hemisphere rays (e.g. 6) float aoStrength; // how dark full occlusion is (e.g. 0.35 = 65% darkening) uint frameIndex; // for temporal rotation of noise pattern uint historyValid; // 0 = no history (first frame), 1 = blend with history uint pad[2]; }; [[vk::push_constant]] ConstantBuffer push : register(b999); // ── Interleaved Gradient Noise (Jorge Jimenez, 2014) ──────────── // Screen-space low-frequency noise with excellent spectral properties. // Combined with Cranley-Patterson rotation per frame for temporal variation. float interleavedGradientNoise(float2 pixelCoord) { return frac(52.9829189 * frac(dot(pixelCoord, float2(0.06711056, 0.00583715)))); } // Golden ratio for Cranley-Patterson rotation static const float GOLDEN_RATIO = 0.618033988749895; // ── Hash (kept for voxel-coord seed) ──────────────────────────── uint hashU(uint a, uint b) { a ^= b * 0x9E3779B9u; a ^= a >> 16; a *= 0x45d9f3bu; return a; } float hashF(uint x) { x ^= x >> 16; x *= 0x45d9f3bu; x ^= x >> 16; return float(x & 0xFFFFFF) / float(0xFFFFFF); } // Build orthonormal basis from normal (Frisvad's method, robust for all N) void buildBasis(float3 N, out float3 T, out float3 B) { if (N.z < -0.9999) { T = float3(0, -1, 0); B = float3(-1, 0, 0); } else { float a = 1.0 / (1.0 + N.z); float b = -N.x * N.y * a; T = float3(1.0 - N.x * N.x * a, b, -N.x); B = float3(b, 1.0 - N.y * N.y * a, -N.y); } } // Cosine-weighted hemisphere sample (probability ∝ cos(θ)) float3 cosineSampleHemisphere(float u1, float u2, float3 N, float3 T, float3 B) { float r = sqrt(u1); float phi = 6.28318530718 * u2; float x = r * cos(phi); float y = r * sin(phi); float z = sqrt(max(0.0, 1.0 - u1)); return normalize(x * T + y * B + z * N); } [RootSignature(VOXEL_ROOTSIG)] [numthreads(8, 8, 1)] void main(uint3 DTid : SV_DispatchThreadID) { if (DTid.x >= push.width || DTid.y >= push.height) return; float depth = depthTexture[DTid.xy]; if (depth == 0.0) { aoOutput[DTid.xy] = 1.0; if (push.debugMode > 0) colorOutput[DTid.xy] = float4(0.1, 0.1, 0.1, 1); return; } // Reconstruct world position from depth via inverse VP float2 uv = (float2(DTid.xy) + 0.5) / float2(push.width, push.height); float2 ndc = float2(uv.x * 2.0 - 1.0, (1.0 - uv.y) * 2.0 - 1.0); float4 clipPos = float4(ndc, depth, 1.0); float4 worldPos4 = mul(inverseViewProjection, clipPos); float3 worldPos = worldPos4.xyz / worldPos4.w; float3 N = normalTexture[DTid.xy].xyz; float3 origin = worldPos + N * push.normalBias; // ── Soft shadow: multiple jittered rays toward sun ───────── float3 L = normalize(-sunDirection.xyz); float NdotL = dot(N, L); float shadowFactor = 1.0; if (NdotL <= 0.0) { shadowFactor = 0.45; // back-facing = fully in shadow } else { // Build basis around sun direction for jitter cone float3 sunT, sunB; buildBasis(L, sunT, sunB); // 2 shadow rays with IGN-based jitter (soft penumbra, temporally accumulated) const uint shadowRays = 2; const float coneAngle = 0.04; // ~2.3° cone = soft sun float shadowHits = 0; float ignBase = interleavedGradientNoise(float2(DTid.xy)); float frameRot = float(push.frameIndex) * GOLDEN_RATIO; [loop] for (uint si = 0; si < shadowRays; si++) { // Per-ray noise with temporal variation float xi1 = frac(ignBase + frameRot + float(si) * GOLDEN_RATIO); float xi2 = frac(ignBase * 1.7 + frameRot * 0.7 + float(si) * 0.3819); // Uniform disk → cone direction float r = sqrt(xi1) * coneAngle; float phi = 6.28318530718 * xi2; float3 jitteredL = normalize(L + r * cos(phi) * sunT + r * sin(phi) * sunB); RayDesc ray; ray.Origin = origin; ray.Direction = jitteredL; ray.TMin = 0.01; ray.TMax = push.shadowMaxDist; RayQuery q; q.TraceRayInline(tlas, 0, 0xFF, ray); [loop] while (q.Proceed()) {} if (q.CommittedStatus() == COMMITTED_TRIANGLE_HIT) { shadowHits += 1.0; } } float shadowAmount = shadowHits / float(shadowRays); // 0=fully lit, 1=fully shadowed shadowFactor = lerp(1.0, 0.45, shadowAmount); } // ── AO: hemisphere rays with IGN + temporal rotation ────── float aoFactor = 1.0; uint rayCount = push.aoRayCount; if (rayCount > 0) { float3 T, B; buildBasis(N, T, B); // IGN base noise: well-distributed screen-space pattern // Cranley-Patterson rotation: offset by golden ratio * frameIndex // Each frame explores different ray directions → temporal accumulation converges float frameRotation = float(push.frameIndex) * GOLDEN_RATIO; float totalOcclusion = 0.0; [loop] for (uint i = 0; i < rayCount; i++) { // Per-ray IGN with spatial offset to decorrelate rays // Each ray uses a different pixel offset → different IGN value float2 rayPixel = float2(DTid.xy) + float2(i * 7.0, i * 3.0); float ign = interleavedGradientNoise(rayPixel); // Cranley-Patterson rotation: shift by golden ratio per frame + per ray float u1 = frac(ign + frameRotation + float(i) * GOLDEN_RATIO); float u2 = frac(interleavedGradientNoise(rayPixel + float2(47.0, 17.0)) + frameRotation + float(i) * 0.381966011250105); // 1/φ² float3 dir = cosineSampleHemisphere(u1, u2, N, T, B); RayDesc aoRay; aoRay.Origin = origin; aoRay.Direction = dir; aoRay.TMin = 0.05; aoRay.TMax = push.aoRadius; RayQuery aoQ; aoQ.TraceRayInline(tlas, 0, 0xFF, aoRay); [loop] while (aoQ.Proceed()) {} if (aoQ.CommittedStatus() == COMMITTED_TRIANGLE_HIT) { float hitT = aoQ.CommittedRayT(); float falloff = 1.0 - saturate(hitT / push.aoRadius); totalOcclusion += falloff * falloff; } } float occlusionRatio = totalOcclusion / float(rayCount); aoFactor = 1.0 - occlusionRatio * push.aoStrength; } // ── Temporal accumulation ──────────────────────────────────── // Reproject current pixel to previous frame's screen space if (push.historyValid != 0) { float4 prevClip = mul(prevViewProjection, float4(worldPos, 1.0)); float2 prevNDC = prevClip.xy / prevClip.w; float2 prevUV = float2(prevNDC.x * 0.5 + 0.5, 0.5 - prevNDC.y * 0.5); // Check if reprojected UV is within screen bounds if (prevUV.x >= 0.0 && prevUV.x < 1.0 && prevUV.y >= 0.0 && prevUV.y < 1.0) { int2 prevPixel = int2(prevUV * float2(push.width, push.height)); float historyAO = aoHistory.Load(int3(prevPixel, 0)); // Blend: low alpha = keep more history (smoother), high alpha = more responsive float blendAlpha = 0.05; // accumulate ~20 frames aoFactor = lerp(historyAO, aoFactor, blendAlpha); } } // ── Write AO to separate buffer (will be blurred), apply shadow in-place ── aoOutput[DTid.xy] = aoFactor; if (push.debugMode == 1) { if (NdotL <= 0.0) colorOutput[DTid.xy] = float4(0, 0, 0.5, 1); else if (shadowFactor < 1.0) colorOutput[DTid.xy] = float4(1, 0, 0, 1); else colorOutput[DTid.xy] = float4(0, 1, 0, 1); } else if (push.debugMode == 2) { colorOutput[DTid.xy] = float4(1, 1, 1, 1); } else { float4 color = colorOutput[DTid.xy]; // Colored shadows: lerp toward shadow tint instead of just darkening // shadowFactor=1 → no change, shadowFactor=0.3 → blend toward tinted shadow float shadowAmount = 1.0 - shadowFactor; // 0=lit, 0.7=full shadow float3 tintedColor = color.rgb * shadowTint.rgb; // shadow = original × tint color color.rgb = lerp(color.rgb, tintedColor, shadowAmount); colorOutput[DTid.xy] = color; } }