bvle-voxels/shaders/voxelShadowCS.hlsl
Samuel Bouchet 82307269e8 Phase 7.1 tuning: reduce saturation, increase contrast, multi-angle screenshots
- Saturation 1.40→1.15, exposure 2.2→1.8 (less oversaturated)
- Shadow factor 0.55→0.45 (more contrast between lit and shadow)
- Ambient reduced slightly for better contrast
- Screenshot mode: 4 camera views (landscape, sideview, topdown, backlit)
- AO history reset between view changes (prevents temporal contamination)
2026-03-29 15:11:42 +02:00

215 lines
8.5 KiB
HLSL
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// BVLE Voxels - RT Shadow + AO Compute Shader (Phase 6.2 + 6.3)
// Per-pixel: traces 1 shadow ray toward sun + N hemisphere rays for AO.
// Temporal accumulation: blends current AO with reprojected history.
// Modulates voxelRT_ in-place via RWTexture2D.
#include "voxelCommon.hlsli"
// SRV bindings
Texture2D<float> depthTexture : register(t0); // voxelDepth_ (D32_FLOAT as R32_FLOAT SRV)
Texture2D<float4> normalTexture : register(t1); // voxelNormalRT_ (R16G16B16A16_SNORM)
RaytracingAccelerationStructure tlas : register(t2); // TLAS with blocky + smooth instances
Texture2D<float> aoHistory : register(t3); // previous frame's AO (temporally accumulated)
// UAV outputs
RWTexture2D<float4> colorOutput : register(u0); // voxelRT_ (shadow applied in-place)
RWTexture2D<float> aoOutput : register(u1); // raw AO factor (blurred separately)
// Push constants
struct ShadowPush {
uint width;
uint height;
float normalBias;
float shadowMaxDist;
uint debugMode; // 0=normal, 1=debug shadows, 2=debug AO
float aoRadius; // max distance for AO rays (e.g. 8.0 voxels)
uint aoRayCount; // number of hemisphere rays (e.g. 6)
float aoStrength; // how dark full occlusion is (e.g. 0.35 = 65% darkening)
uint frameIndex; // for temporal rotation of noise pattern
uint historyValid; // 0 = no history (first frame), 1 = blend with history
uint pad[2];
};
[[vk::push_constant]] ConstantBuffer<ShadowPush> push : register(b999);
// ── Interleaved Gradient Noise (Jorge Jimenez, 2014) ────────────
// Screen-space low-frequency noise with excellent spectral properties.
// Combined with Cranley-Patterson rotation per frame for temporal variation.
float interleavedGradientNoise(float2 pixelCoord) {
return frac(52.9829189 * frac(dot(pixelCoord, float2(0.06711056, 0.00583715))));
}
// Golden ratio for Cranley-Patterson rotation
static const float GOLDEN_RATIO = 0.618033988749895;
// ── Hash (kept for voxel-coord seed) ────────────────────────────
uint hashU(uint a, uint b) {
a ^= b * 0x9E3779B9u;
a ^= a >> 16;
a *= 0x45d9f3bu;
return a;
}
float hashF(uint x) {
x ^= x >> 16;
x *= 0x45d9f3bu;
x ^= x >> 16;
return float(x & 0xFFFFFF) / float(0xFFFFFF);
}
// Build orthonormal basis from normal (Frisvad's method, robust for all N)
void buildBasis(float3 N, out float3 T, out float3 B) {
if (N.z < -0.9999) {
T = float3(0, -1, 0);
B = float3(-1, 0, 0);
} else {
float a = 1.0 / (1.0 + N.z);
float b = -N.x * N.y * a;
T = float3(1.0 - N.x * N.x * a, b, -N.x);
B = float3(b, 1.0 - N.y * N.y * a, -N.y);
}
}
// Cosine-weighted hemisphere sample (probability ∝ cos(θ))
float3 cosineSampleHemisphere(float u1, float u2, float3 N, float3 T, float3 B) {
float r = sqrt(u1);
float phi = 6.28318530718 * u2;
float x = r * cos(phi);
float y = r * sin(phi);
float z = sqrt(max(0.0, 1.0 - u1));
return normalize(x * T + y * B + z * N);
}
[RootSignature(VOXEL_ROOTSIG)]
[numthreads(8, 8, 1)]
void main(uint3 DTid : SV_DispatchThreadID) {
if (DTid.x >= push.width || DTid.y >= push.height) return;
float depth = depthTexture[DTid.xy];
if (depth == 0.0) {
aoOutput[DTid.xy] = 1.0;
if (push.debugMode > 0) colorOutput[DTid.xy] = float4(0.1, 0.1, 0.1, 1);
return;
}
// Reconstruct world position from depth via inverse VP
float2 uv = (float2(DTid.xy) + 0.5) / float2(push.width, push.height);
float2 ndc = float2(uv.x * 2.0 - 1.0, (1.0 - uv.y) * 2.0 - 1.0);
float4 clipPos = float4(ndc, depth, 1.0);
float4 worldPos4 = mul(inverseViewProjection, clipPos);
float3 worldPos = worldPos4.xyz / worldPos4.w;
float3 N = normalTexture[DTid.xy].xyz;
float3 origin = worldPos + N * push.normalBias;
// ── Shadow ray toward sun ──────────────────────────────────
float3 L = normalize(-sunDirection.xyz);
float NdotL = dot(N, L);
float shadowFactor = 1.0;
if (NdotL <= 0.0) {
shadowFactor = 0.45; // back-facing = fully in shadow
} else {
RayDesc ray;
ray.Origin = origin;
ray.Direction = L;
ray.TMin = 0.01;
ray.TMax = push.shadowMaxDist;
RayQuery<RAY_FLAG_SKIP_PROCEDURAL_PRIMITIVES | RAY_FLAG_ACCEPT_FIRST_HIT_AND_END_SEARCH> q;
q.TraceRayInline(tlas, 0, 0xFF, ray);
[loop] while (q.Proceed()) {}
if (q.CommittedStatus() == COMMITTED_TRIANGLE_HIT) {
shadowFactor = 0.45;
}
}
// ── AO: hemisphere rays with IGN + temporal rotation ──────
float aoFactor = 1.0;
uint rayCount = push.aoRayCount;
if (rayCount > 0) {
float3 T, B;
buildBasis(N, T, B);
// IGN base noise: well-distributed screen-space pattern
// Cranley-Patterson rotation: offset by golden ratio * frameIndex
// Each frame explores different ray directions → temporal accumulation converges
float frameRotation = float(push.frameIndex) * GOLDEN_RATIO;
float totalOcclusion = 0.0;
[loop]
for (uint i = 0; i < rayCount; i++) {
// Per-ray IGN with spatial offset to decorrelate rays
// Each ray uses a different pixel offset → different IGN value
float2 rayPixel = float2(DTid.xy) + float2(i * 7.0, i * 3.0);
float ign = interleavedGradientNoise(rayPixel);
// Cranley-Patterson rotation: shift by golden ratio per frame + per ray
float u1 = frac(ign + frameRotation + float(i) * GOLDEN_RATIO);
float u2 = frac(interleavedGradientNoise(rayPixel + float2(47.0, 17.0))
+ frameRotation + float(i) * 0.381966011250105); // 1/φ²
float3 dir = cosineSampleHemisphere(u1, u2, N, T, B);
RayDesc aoRay;
aoRay.Origin = origin;
aoRay.Direction = dir;
aoRay.TMin = 0.05;
aoRay.TMax = push.aoRadius;
RayQuery<RAY_FLAG_SKIP_PROCEDURAL_PRIMITIVES | RAY_FLAG_ACCEPT_FIRST_HIT_AND_END_SEARCH> aoQ;
aoQ.TraceRayInline(tlas, 0, 0xFF, aoRay);
[loop] while (aoQ.Proceed()) {}
if (aoQ.CommittedStatus() == COMMITTED_TRIANGLE_HIT) {
float hitT = aoQ.CommittedRayT();
float falloff = 1.0 - saturate(hitT / push.aoRadius);
totalOcclusion += falloff * falloff;
}
}
float occlusionRatio = totalOcclusion / float(rayCount);
aoFactor = 1.0 - occlusionRatio * push.aoStrength;
}
// ── Temporal accumulation ────────────────────────────────────
// Reproject current pixel to previous frame's screen space
if (push.historyValid != 0) {
float4 prevClip = mul(prevViewProjection, float4(worldPos, 1.0));
float2 prevNDC = prevClip.xy / prevClip.w;
float2 prevUV = float2(prevNDC.x * 0.5 + 0.5, 0.5 - prevNDC.y * 0.5);
// Check if reprojected UV is within screen bounds
if (prevUV.x >= 0.0 && prevUV.x < 1.0 && prevUV.y >= 0.0 && prevUV.y < 1.0) {
int2 prevPixel = int2(prevUV * float2(push.width, push.height));
float historyAO = aoHistory.Load(int3(prevPixel, 0));
// Blend: low alpha = keep more history (smoother), high alpha = more responsive
float blendAlpha = 0.05; // accumulate ~20 frames
aoFactor = lerp(historyAO, aoFactor, blendAlpha);
}
}
// ── Write AO to separate buffer (will be blurred), apply shadow in-place ──
aoOutput[DTid.xy] = aoFactor;
if (push.debugMode == 1) {
if (NdotL <= 0.0)
colorOutput[DTid.xy] = float4(0, 0, 0.5, 1);
else if (shadowFactor < 1.0)
colorOutput[DTid.xy] = float4(1, 0, 0, 1);
else
colorOutput[DTid.xy] = float4(0, 1, 0, 1);
} else if (push.debugMode == 2) {
colorOutput[DTid.xy] = float4(1, 1, 1, 1);
} else {
float4 color = colorOutput[DTid.xy];
// Colored shadows: lerp toward shadow tint instead of just darkening
// shadowFactor=1 → no change, shadowFactor=0.3 → blend toward tinted shadow
float shadowAmount = 1.0 - shadowFactor; // 0=lit, 0.7=full shadow
float3 tintedColor = color.rgb * shadowTint.rgb; // shadow = original × tint color
color.rgb = lerp(color.rgb, tintedColor, shadowAmount);
colorOutput[DTid.xy] = color;
}
}