bvle-voxels/shaders/voxelShadowCS.hlsl
Samuel Bouchet 3d0c4f2f80 Phase 4.2+7: grass blade rework + soft RT shadows + toping BLAS optimization
Grass blades:
- Leaf-shaped profile (4 sections: base→belly→taper→tip) instead of spiky triangles
- Wider blades (base 0.055-0.095), more spacing between blades (±0.07 scatter)
- Natural green texture (50,140,35 → 80,180,55) instead of neon lime
- Reduced warm shift and removed artificial saturation boost
- Side faces at 60% brightness (dark green) instead of 38% (near-black)

Soft RT shadows:
- 2 jittered shadow rays per pixel with IGN+Cranley-Patterson temporal variation
- 2.3° cone around sun direction for soft penumbra
- Gradual shadow factor (0-100%) instead of binary on/off

Performance:
- Toping BLAS removed from TLAS (23M+ tris caused massive ray traversal slowdown)
- Toping BLAS position/index buffer construction skipped entirely
- Shadow rays reduced from 4 to 2 (temporal accumulation compensates)
2026-03-29 19:46:25 +02:00

241 lines
9.7 KiB
HLSL
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// BVLE Voxels - RT Shadow + AO Compute Shader (Phase 6.2 + 6.3)
// Per-pixel: traces 1 shadow ray toward sun + N hemisphere rays for AO.
// Temporal accumulation: blends current AO with reprojected history.
// Modulates voxelRT_ in-place via RWTexture2D.
#include "voxelCommon.hlsli"
// SRV bindings
Texture2D<float> depthTexture : register(t0); // voxelDepth_ (D32_FLOAT as R32_FLOAT SRV)
Texture2D<float4> normalTexture : register(t1); // voxelNormalRT_ (R16G16B16A16_SNORM)
RaytracingAccelerationStructure tlas : register(t2); // TLAS with blocky + smooth instances
Texture2D<float> aoHistory : register(t3); // previous frame's AO (temporally accumulated)
// UAV outputs
RWTexture2D<float4> colorOutput : register(u0); // voxelRT_ (shadow applied in-place)
RWTexture2D<float> aoOutput : register(u1); // raw AO factor (blurred separately)
// Push constants
struct ShadowPush {
uint width;
uint height;
float normalBias;
float shadowMaxDist;
uint debugMode; // 0=normal, 1=debug shadows, 2=debug AO
float aoRadius; // max distance for AO rays (e.g. 8.0 voxels)
uint aoRayCount; // number of hemisphere rays (e.g. 6)
float aoStrength; // how dark full occlusion is (e.g. 0.35 = 65% darkening)
uint frameIndex; // for temporal rotation of noise pattern
uint historyValid; // 0 = no history (first frame), 1 = blend with history
uint pad[2];
};
[[vk::push_constant]] ConstantBuffer<ShadowPush> push : register(b999);
// ── Interleaved Gradient Noise (Jorge Jimenez, 2014) ────────────
// Screen-space low-frequency noise with excellent spectral properties.
// Combined with Cranley-Patterson rotation per frame for temporal variation.
float interleavedGradientNoise(float2 pixelCoord) {
return frac(52.9829189 * frac(dot(pixelCoord, float2(0.06711056, 0.00583715))));
}
// Golden ratio for Cranley-Patterson rotation
static const float GOLDEN_RATIO = 0.618033988749895;
// ── Hash (kept for voxel-coord seed) ────────────────────────────
uint hashU(uint a, uint b) {
a ^= b * 0x9E3779B9u;
a ^= a >> 16;
a *= 0x45d9f3bu;
return a;
}
float hashF(uint x) {
x ^= x >> 16;
x *= 0x45d9f3bu;
x ^= x >> 16;
return float(x & 0xFFFFFF) / float(0xFFFFFF);
}
// Build orthonormal basis from normal (Frisvad's method, robust for all N)
void buildBasis(float3 N, out float3 T, out float3 B) {
if (N.z < -0.9999) {
T = float3(0, -1, 0);
B = float3(-1, 0, 0);
} else {
float a = 1.0 / (1.0 + N.z);
float b = -N.x * N.y * a;
T = float3(1.0 - N.x * N.x * a, b, -N.x);
B = float3(b, 1.0 - N.y * N.y * a, -N.y);
}
}
// Cosine-weighted hemisphere sample (probability ∝ cos(θ))
float3 cosineSampleHemisphere(float u1, float u2, float3 N, float3 T, float3 B) {
float r = sqrt(u1);
float phi = 6.28318530718 * u2;
float x = r * cos(phi);
float y = r * sin(phi);
float z = sqrt(max(0.0, 1.0 - u1));
return normalize(x * T + y * B + z * N);
}
[RootSignature(VOXEL_ROOTSIG)]
[numthreads(8, 8, 1)]
void main(uint3 DTid : SV_DispatchThreadID) {
if (DTid.x >= push.width || DTid.y >= push.height) return;
float depth = depthTexture[DTid.xy];
if (depth == 0.0) {
aoOutput[DTid.xy] = 1.0;
if (push.debugMode > 0) colorOutput[DTid.xy] = float4(0.1, 0.1, 0.1, 1);
return;
}
// Reconstruct world position from depth via inverse VP
float2 uv = (float2(DTid.xy) + 0.5) / float2(push.width, push.height);
float2 ndc = float2(uv.x * 2.0 - 1.0, (1.0 - uv.y) * 2.0 - 1.0);
float4 clipPos = float4(ndc, depth, 1.0);
float4 worldPos4 = mul(inverseViewProjection, clipPos);
float3 worldPos = worldPos4.xyz / worldPos4.w;
float3 N = normalTexture[DTid.xy].xyz;
float3 origin = worldPos + N * push.normalBias;
// ── Soft shadow: multiple jittered rays toward sun ─────────
float3 L = normalize(-sunDirection.xyz);
float NdotL = dot(N, L);
float shadowFactor = 1.0;
if (NdotL <= 0.0) {
shadowFactor = 0.45; // back-facing = fully in shadow
} else {
// Build basis around sun direction for jitter cone
float3 sunT, sunB;
buildBasis(L, sunT, sunB);
// 2 shadow rays with IGN-based jitter (soft penumbra, temporally accumulated)
const uint shadowRays = 2;
const float coneAngle = 0.04; // ~2.3° cone = soft sun
float shadowHits = 0;
float ignBase = interleavedGradientNoise(float2(DTid.xy));
float frameRot = float(push.frameIndex) * GOLDEN_RATIO;
[loop]
for (uint si = 0; si < shadowRays; si++) {
// Per-ray noise with temporal variation
float xi1 = frac(ignBase + frameRot + float(si) * GOLDEN_RATIO);
float xi2 = frac(ignBase * 1.7 + frameRot * 0.7 + float(si) * 0.3819);
// Uniform disk → cone direction
float r = sqrt(xi1) * coneAngle;
float phi = 6.28318530718 * xi2;
float3 jitteredL = normalize(L + r * cos(phi) * sunT + r * sin(phi) * sunB);
RayDesc ray;
ray.Origin = origin;
ray.Direction = jitteredL;
ray.TMin = 0.01;
ray.TMax = push.shadowMaxDist;
RayQuery<RAY_FLAG_SKIP_PROCEDURAL_PRIMITIVES | RAY_FLAG_ACCEPT_FIRST_HIT_AND_END_SEARCH> q;
q.TraceRayInline(tlas, 0, 0xFF, ray);
[loop] while (q.Proceed()) {}
if (q.CommittedStatus() == COMMITTED_TRIANGLE_HIT) {
shadowHits += 1.0;
}
}
float shadowAmount = shadowHits / float(shadowRays); // 0=fully lit, 1=fully shadowed
shadowFactor = lerp(1.0, 0.45, shadowAmount);
}
// ── AO: hemisphere rays with IGN + temporal rotation ──────
float aoFactor = 1.0;
uint rayCount = push.aoRayCount;
if (rayCount > 0) {
float3 T, B;
buildBasis(N, T, B);
// IGN base noise: well-distributed screen-space pattern
// Cranley-Patterson rotation: offset by golden ratio * frameIndex
// Each frame explores different ray directions → temporal accumulation converges
float frameRotation = float(push.frameIndex) * GOLDEN_RATIO;
float totalOcclusion = 0.0;
[loop]
for (uint i = 0; i < rayCount; i++) {
// Per-ray IGN with spatial offset to decorrelate rays
// Each ray uses a different pixel offset → different IGN value
float2 rayPixel = float2(DTid.xy) + float2(i * 7.0, i * 3.0);
float ign = interleavedGradientNoise(rayPixel);
// Cranley-Patterson rotation: shift by golden ratio per frame + per ray
float u1 = frac(ign + frameRotation + float(i) * GOLDEN_RATIO);
float u2 = frac(interleavedGradientNoise(rayPixel + float2(47.0, 17.0))
+ frameRotation + float(i) * 0.381966011250105); // 1/φ²
float3 dir = cosineSampleHemisphere(u1, u2, N, T, B);
RayDesc aoRay;
aoRay.Origin = origin;
aoRay.Direction = dir;
aoRay.TMin = 0.05;
aoRay.TMax = push.aoRadius;
RayQuery<RAY_FLAG_SKIP_PROCEDURAL_PRIMITIVES | RAY_FLAG_ACCEPT_FIRST_HIT_AND_END_SEARCH> aoQ;
aoQ.TraceRayInline(tlas, 0, 0xFF, aoRay);
[loop] while (aoQ.Proceed()) {}
if (aoQ.CommittedStatus() == COMMITTED_TRIANGLE_HIT) {
float hitT = aoQ.CommittedRayT();
float falloff = 1.0 - saturate(hitT / push.aoRadius);
totalOcclusion += falloff * falloff;
}
}
float occlusionRatio = totalOcclusion / float(rayCount);
aoFactor = 1.0 - occlusionRatio * push.aoStrength;
}
// ── Temporal accumulation ────────────────────────────────────
// Reproject current pixel to previous frame's screen space
if (push.historyValid != 0) {
float4 prevClip = mul(prevViewProjection, float4(worldPos, 1.0));
float2 prevNDC = prevClip.xy / prevClip.w;
float2 prevUV = float2(prevNDC.x * 0.5 + 0.5, 0.5 - prevNDC.y * 0.5);
// Check if reprojected UV is within screen bounds
if (prevUV.x >= 0.0 && prevUV.x < 1.0 && prevUV.y >= 0.0 && prevUV.y < 1.0) {
int2 prevPixel = int2(prevUV * float2(push.width, push.height));
float historyAO = aoHistory.Load(int3(prevPixel, 0));
// Blend: low alpha = keep more history (smoother), high alpha = more responsive
float blendAlpha = 0.05; // accumulate ~20 frames
aoFactor = lerp(historyAO, aoFactor, blendAlpha);
}
}
// ── Write AO to separate buffer (will be blurred), apply shadow in-place ──
aoOutput[DTid.xy] = aoFactor;
if (push.debugMode == 1) {
if (NdotL <= 0.0)
colorOutput[DTid.xy] = float4(0, 0, 0.5, 1);
else if (shadowFactor < 1.0)
colorOutput[DTid.xy] = float4(1, 0, 0, 1);
else
colorOutput[DTid.xy] = float4(0, 1, 0, 1);
} else if (push.debugMode == 2) {
colorOutput[DTid.xy] = float4(1, 1, 1, 1);
} else {
float4 color = colorOutput[DTid.xy];
// Colored shadows: lerp toward shadow tint instead of just darkening
// shadowFactor=1 → no change, shadowFactor=0.3 → blend toward tinted shadow
float shadowAmount = 1.0 - shadowFactor; // 0=lit, 0.7=full shadow
float3 tintedColor = color.rgb * shadowTint.rgb; // shadow = original × tint color
color.rgb = lerp(color.rgb, tintedColor, shadowAmount);
colorOutput[DTid.xy] = color;
}
}