From 7f36bdae3837152eca3074c763f4349c999f495d Mon Sep 17 00:00:00 2001 From: Samuel Bouchet Date: Sat, 28 Mar 2026 14:48:11 +0100 Subject: [PATCH] =?UTF-8?q?Phase=206.1:=20RT=20infrastructure=20=E2=80=94?= =?UTF-8?q?=20MRT=20normals=20+=20BLAS/TLAS=20build?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Normal render target (R16G16B16A16_SNORM) as MRT SV_TARGET1 in all 3 pixel shaders (voxelPS, voxelTopingPS, voxelSmoothPS) for future RT shadow/AO - BLAS extraction compute shader (voxelBLASExtractCS.hlsl): converts PackedQuad StructuredBuffer to float3 position buffer for DXR BLAS input - Blocky BLAS: single BLAS from all GPU-meshed quads (~1.5M triangles) - Smooth BLAS: single BLAS from smooth vertex buffer directly - TLAS: 2 instances (blocky + smooth), identity transforms, CreateBuffer2 with callback to avoid UpdateBuffer on RAY_TRACING flagged buffers - Fix: Wicked always accesses index_buffer in CreateRaytracingAccelerationStructure via to_internal() even for non-indexed geometry — provide dummy valid buffer --- CLAUDE.md | 64 +++++- CMakeLists.txt | 3 + shaders/voxelBLASExtractCS.hlsl | 119 +++++++++++ shaders/voxelPS.hlsl | 26 ++- shaders/voxelSmoothPS.hlsl | 13 +- shaders/voxelTopingPS.hlsl | 12 +- src/voxel/VoxelRenderer.cpp | 364 ++++++++++++++++++++++++++++++-- src/voxel/VoxelRenderer.h | 32 ++- 8 files changed, 593 insertions(+), 40 deletions(-) create mode 100644 shaders/voxelBLASExtractCS.hlsl diff --git a/CLAUDE.md b/CLAUDE.md index d0aec5d..4b32534 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -31,7 +31,8 @@ bvle-voxels/ │ ├── voxelTopingVS.hlsl # Vertex shader topings (instanced vertex pulling, t4/t5) │ ├── voxelTopingPS.hlsl # Pixel shader topings (triplanar + directional lighting) │ ├── voxelSmoothVS.hlsl # Vertex shader smooth Surface Nets (vertex pulling, t6) -│ └── voxelSmoothPS.hlsl # Pixel shader smooth (triplanar + material blending) +│ ├── voxelSmoothPS.hlsl # Pixel shader smooth (triplanar + material blending) +│ └── voxelBLASExtractCS.hlsl # Compute shader BLAS position extraction (Phase 6.1) └── CLAUDE.md ``` @@ -172,7 +173,7 @@ Les shaders custom doivent respecter le **binding model de Wicked Engine** : 8. **SV_VertexID et startVertexLocation — PIÈGE MAJEUR** : - Avec `ExecuteIndirect` (DrawInstancedIndirectCount), `SV_VertexID` **n'inclut PAS de manière fiable** `startVertexLocation` de `D3D12_DRAW_ARGUMENTS`. Observé sur AMD RDNA 2 (RX 5700 XT) : SV_VertexID commence toujours à 0 pour chaque draw, ignorant startVertexLocation. + Avec `ExecuteIndirect` (DrawInstancedIndirectCount), `SV_VertexID` **n'inclut PAS de manière fiable** `startVertexLocation` de `D3D12_DRAW_ARGUMENTS`. Observé sur AMD RDNA 4 (RX 9070 XT) : SV_VertexID commence toujours à 0 pour chaque draw, ignorant startVertexLocation. **Solution** : toujours mettre `startVertexLocation = 0` dans les indirect args, et passer l'offset des quads par un autre canal (push constant + GPUChunkInfo lookup). Ne JAMAIS compter sur `startVertexLocation` pour encoder un offset dans le mega-buffer. @@ -350,7 +351,7 @@ Découpée en sous-phases pour isoler les sources de bugs potentiels : - Le compute shader `voxelMeshCS.hlsl` fait le meshing 1×1 sur GPU (1 thread par voxel, 8×8×8 thread groups) - Benchmark automatique au premier frame après génération du monde (mode CPU fallback) -- Résultats (168 chunks, Ryzen 7 3700X + RX 5700 XT) : +- Résultats (168 chunks, Ryzen 7 9800X3D + RX 9070 XT) : - CPU greedy: 277 ms, 358K quads → greedy merge réduit les quads de 6.8× - GPU baseline (1×1): 5.3 ms, 2.43M quads → 52× plus rapide que CPU - GPU greedy merge non implémenté (pourrait combiner vitesse GPU + réduction de quads) @@ -371,7 +372,7 @@ Découpée en sous-phases pour isoler les sources de bugs potentiels : - **Skip GPU dispatch** : `gpuMeshDirty_` flag empêche le re-dispatch/upload quand rien n'a changé - **Upload conditionnel** : `chunkInfoBuffer_` ne se re-upload que quand `chunkInfoDirty_` - **Animation allégée** : 2 octaves fBm (au lieu de 5) + pas de caves en mode animation (54ms → 8ms) -- **Résultats finaux** (171 chunks, Ryzen 7 3700X + RX 5700 XT, animation 60 Hz) : +- **Résultats finaux** (171 chunks, Ryzen 7 9800X3D + RX 9070 XT, animation 60 Hz) : - Regenerate: 8.7ms (parallèle, 2 octaves) - VoxelPack: 0ms (fusionné dans regenerate) - GPU Upload: 4.5ms (~11 MB voxel data) @@ -507,7 +508,7 @@ Système de biseaux décoratifs (« topings ») sur les faces +Y exposées pour - **`collectInstancesParallel()`** : chaque chunk écrit dans un vecteur local, merge séquentiel. Élimine la contention - **Staging vectors persistants** : `topingSorted_`, `topingGpuInsts_` réutilisés entre frames -**Résultats animation (648 chunks, Ryzen 7 3700X + RX 5700 XT)** : +**Résultats animation (648 chunks, Ryzen 7 9800X3D + RX 9070 XT)** : - SmoothMesh: 560ms → 17ms (parallèle, dilation, cache) - SmoothUpload: 13ms → 4ms (staging persistant) - TopingCollect: 58ms → 6.5ms (parallèle) @@ -527,16 +528,57 @@ Système de biseaux décoratifs (« topings ») sur les faces +Y exposées pour - LOD : réduction de triangles à distance - Pipeline asynchrone : double-buffer GPU resources, CPU frame N prépare pendant que GPU rend frame N-1 -### Phase 6 - Ray tracing hybride [A FAIRE] +### Phase 6 - Ray tracing hybride [EN COURS] -- BLAS par chunk (depuis le mesh greedy), TLAS par frame -- RT Shadows via ray queries (compute shader) -- RT AO (4-8 rayons, courte portée) -- Fallback shadow maps / SSAO si RT non disponible +#### Phase 6.1 - Infrastructure RT (Normal RT + BLAS/TLAS) [FAIT] + +- **Normal render target (MRT)** : `voxelNormalRT_` (R16G16B16A16_SNORM) added as SV_TARGET1 + - All 3 pixel shaders (voxelPS, voxelTopingPS, voxelSmoothPS) output `PSOutput` struct with `SV_TARGET0` (color) + `SV_TARGET1` (world-space normal) + - All 3 render passes (`render`, `renderTopings`, `renderSmooth`) use 3 `RenderPassImage` entries (color + normal + depth) +- **BLAS extraction compute shader** (`voxelBLASExtractCS.hlsl`) : + - Reads `gpuQuadBuffer_` (StructuredBuffer), extracts world-space float3 positions + - 1 thread per quad → 6 vertices (2 triangles), same unpack + winding logic as voxelVS.hlsl + - Output: `blasPositionBuffer_` (RWByteAddressBuffer, raw buffer), non-indexed triangles + - Dispatched after GPU mesh pass, only when quad count changes +- **Blocky BLAS** : single BLAS from `blasPositionBuffer_` (all blocky quads as non-indexed triangles) + - `PREFER_FAST_BUILD` flag for quick rebuilds during animation + - Vertex format: R32G32B32_FLOAT, stride 12 bytes +- **Smooth BLAS** : single BLAS from `gpuSmoothVertexBuffer_` directly (no extraction needed) + - Position at offset 0, stride 32 bytes (SmoothVtx struct) + - Same `PREFER_FAST_BUILD` flag +- **TLAS** : 2 instances (blocky + smooth), identity transforms (all positions are world-space) + - Instance buffer created via `CreateBuffer2` with pre-filled instance data (callback) + - `instance_mask = 0xFF` for both instances + - Recreated each rebuild (avoids `UpdateBuffer` on RAY_TRACING flagged buffers) +- **Lifecycle** : BLAS/TLAS rebuilt when geometry changes (quad count differs from previous frame) + - `rtDirty_` flag triggers rebuild on first frame + - Smooth BLAS auto-recreated when vertex count changes +- **HUD** : RT status line showing TLAS state + triangle counts for blocky/smooth +- **Pièges résolus** : + - **Index buffer obligatoire dans BLAS** : `CreateRaytracingAccelerationStructure` dans Wicked accède TOUJOURS `index_buffer` via `to_internal()` (ligne 4356 de `wiGraphicsDevice_DX12.cpp`), même pour de la géométrie non-indexée. Un `GPUBuffer` par défaut (invalide) cause un null deref à offset 0xd8. Solution : fournir un buffer valide dummy + `index_count = 0` + - **`CreateBuffer2` pour TLAS instance buffer** : les buffers avec `ResourceMiscFlag::RAY_TRACING` ne supportent pas `UpdateBuffer` (state mismatch). Utiliser `CreateBuffer2` avec callback pour pré-remplir les instances à la création + +#### Phase 6.2 - RT Shadows [A FAIRE] + +- Compute shader with inline ray queries (`TraceRayInline`) +- Bind TLAS as SRV, voxelNormalRT_ + voxelDepth_ as input +- Shadow map output (R8_UNORM or similar) +- Sun direction ray: trace from surface point toward light + +#### Phase 6.3 - RT AO [A FAIRE] + +- 4-8 hemisphere rays per pixel, short range +- Cosine-weighted random directions from normal +- Output: AO factor (R8_UNORM) + +#### Phase 6.4 - Fallback [A FAIRE] + +- Shadow maps + SSAO when RT not available +- `CheckCapability(RAYTRACING)` gating ## Métriques cibles et résultats -| Métrique | Cible | Résultat (Ryzen 7 3700X + RX 5700 XT) | +| Métrique | Cible | Résultat (Ryzen 7 9800X3D + RX 9070 XT) | |----------|-------|---------------------------------------| | FPS 1440p | > 60 fps | ✅ 80-110 FPS (anim blocky), 700+ FPS (statique) | | FPS anim smooth+topings | > 15 fps | ✅ 17 FPS (smooth+topings+blocky anim 60Hz) | diff --git a/CMakeLists.txt b/CMakeLists.txt index 2cbf5f9..dee1de2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -63,6 +63,9 @@ add_custom_command(TARGET BVLEVoxels POST_BUILD $/shaders/hlsl6/voxel/voxelTopingPS.cso $/shaders/hlsl6/voxel/voxelSmoothVS.cso $/shaders/hlsl6/voxel/voxelSmoothPS.cso + $/shaders/hlsl6/voxel/voxelSmoothCentroidCS.cso + $/shaders/hlsl6/voxel/voxelSmoothCS.cso + $/shaders/hlsl6/voxel/voxelBLASExtractCS.cso $/shaders/hlsl6/voxel/voxelCommon.hlsli.cso COMMENT "Clearing stale voxel shader cache (forces recompilation from current .hlsl sources)" ) diff --git a/shaders/voxelBLASExtractCS.hlsl b/shaders/voxelBLASExtractCS.hlsl new file mode 100644 index 0000000..924b6dc --- /dev/null +++ b/shaders/voxelBLASExtractCS.hlsl @@ -0,0 +1,119 @@ +// BVLE Voxels - BLAS Position Extraction Compute Shader (Phase 6.1) +// Reads GPU-generated PackedQuads and writes flat float3 positions +// suitable for DXR BLAS construction (non-indexed, 6 vertices per quad). +// +// Uses the exact same unpack + winding logic as voxelVS.hlsl. +// Output is RWByteAddressBuffer (raw buffer) for BLAS vertex compatibility. + +#include "voxelCommon.hlsli" + +struct PackedQuad { + uint2 data; // 8 bytes = 2 x uint32 +}; + +StructuredBuffer quadBuffer : register(t0); +StructuredBuffer chunkInfoBuffer : register(t2); + +// Output: raw float3 positions (12 bytes each), 6 per quad +RWByteAddressBuffer blasPositions : register(u0); + +// Push constants (b999) +struct BLASPush { + uint quadCount; + uint pad0, pad1, pad2, pad3, pad4, pad5, pad6, pad7, pad8, pad9, pad10; +}; +[[vk::push_constant]] ConstantBuffer push : register(b999); + +// ── Face direction tables (SAME as voxelVS.hlsl) ─────────────────── +static const float3 faceNormals[6] = { + float3( 1, 0, 0), float3(-1, 0, 0), + float3( 0, 1, 0), float3( 0,-1, 0), + float3( 0, 0, 1), float3( 0, 0,-1) +}; + +static const float3 faceU[6] = { + float3(0, 1, 0), float3(0, 1, 0), + float3(1, 0, 0), float3(1, 0, 0), + float3(1, 0, 0), float3(1, 0, 0) +}; + +static const float3 faceV[6] = { + float3(0, 0, 1), float3(0, 0, 1), + float3(0, 0, 1), float3(0, 0, 1), + float3(0, 1, 0), float3(0, 1, 0) +}; + +// Helper: store float3 at byte offset in raw buffer +void storeFloat3(uint byteOffset, float3 v) { + blasPositions.Store(byteOffset, asuint(v.x)); + blasPositions.Store(byteOffset + 4, asuint(v.y)); + blasPositions.Store(byteOffset + 8, asuint(v.z)); +} + +// ── Quad unpacking (SAME as voxelVS.hlsl + chunkIndex from GPU mesh bits) ── +void unpackQuad(uint2 raw, out uint px, out uint py, out uint pz, + out uint w, out uint h, out uint face, out uint chunkIdx) +{ + uint lo = raw.x; + uint hi = raw.y; + px = lo & 0x3F; + py = (lo >> 6) & 0x3F; + pz = (lo >> 12) & 0x3F; + w = (lo >> 18) & 0x3F; + h = (lo >> 24) & 0x3F; + face = ((lo >> 30) & 0x3) | ((hi & 0x1) << 2); + // GPU mesh path: chunkIndex in bits [27:17] of hi word + chunkIdx = (hi >> 17) & 0x7FF; +} + +[RootSignature(VOXEL_ROOTSIG)] +[numthreads(64, 1, 1)] +void main(uint3 DTid : SV_DispatchThreadID) { + uint quadIdx = DTid.x; + if (quadIdx >= push.quadCount) return; + + PackedQuad packed = quadBuffer[quadIdx]; + uint px, py, pz, w, h, face, chunkIdx; + unpackQuad(packed.data, px, py, pz, w, h, face, chunkIdx); + + GPUChunkInfo info = chunkInfoBuffer[chunkIdx]; + + // ── Compute 4 corner world positions (same math as voxelVS.hlsl) ── + float3 basePos = float3((float)px, (float)py, (float)pz); + float3 normal = faceNormals[face]; + float3 uAxis = faceU[face]; + float3 vAxis = faceV[face]; + + // Positive faces: offset by 1 in normal direction + float3 faceOffset = (face % 2 == 0) ? normal : float3(0, 0, 0); + + float3 origin = basePos + faceOffset + info.worldPos.xyz; + float3 p00 = origin; + float3 p10 = origin + uAxis * (float)w; + float3 p01 = origin + vAxis * (float)h; + float3 p11 = origin + uAxis * (float)w + vAxis * (float)h; + + // ── Winding: must match voxelVS.hlsl ── + // CW for faces 0,3,4 ; CCW for faces 1,2,5 + bool useCCW = (face == 1 || face == 2 || face == 5); + + // 6 vertices × 12 bytes (float3) = 72 bytes per quad + uint byteBase = quadIdx * 72; + if (useCCW) { + // CCW: (0,0)(1,0)(0,1), (0,1)(1,0)(1,1) + storeFloat3(byteBase + 0, p00); + storeFloat3(byteBase + 12, p10); + storeFloat3(byteBase + 24, p01); + storeFloat3(byteBase + 36, p01); + storeFloat3(byteBase + 48, p10); + storeFloat3(byteBase + 60, p11); + } else { + // CW: (0,0)(0,1)(1,0), (1,0)(0,1)(1,1) + storeFloat3(byteBase + 0, p00); + storeFloat3(byteBase + 12, p01); + storeFloat3(byteBase + 24, p10); + storeFloat3(byteBase + 36, p10); + storeFloat3(byteBase + 48, p01); + storeFloat3(byteBase + 60, p11); + } +} diff --git a/shaders/voxelPS.hlsl b/shaders/voxelPS.hlsl index 521c5c3..699846b 100644 --- a/shaders/voxelPS.hlsl +++ b/shaders/voxelPS.hlsl @@ -130,11 +130,19 @@ static const float3 faceDebugColors[6] = { float3(0.0, 0.0, 0.5), // 5: -Z = DARK BLUE }; +// ── MRT Output ───────────────────────────────────────────────────── +struct PSOutput { + float4 color : SV_TARGET0; + float4 normal : SV_TARGET1; +}; + // ── Main PS ──────────────────────────────────────────────────────── [RootSignature(VOXEL_ROOTSIG)] -float4 main(PSInput input) : SV_TARGET0 +PSOutput main(PSInput input) { + PSOutput output; + // ── DEBUG MODE: face direction colors ── if (input.debugFlag > 0.5) { @@ -143,7 +151,9 @@ float4 main(PSInput input) : SV_TARGET0 float2 checker = floor(input.worldPos.xz * 0.5); float check = frac((checker.x + checker.y) * 0.5) * 2.0; faceColor *= (0.85 + 0.15 * check); - return float4(faceColor, 1.0); + output.color = float4(faceColor, 1.0); + output.normal = float4(normalize(input.normal), 0.0); + return output; } // ── NORMAL MODE: triplanar textured with height-based blending ── @@ -219,12 +229,16 @@ float4 main(PSInput input) : SV_TARGET0 float3 debugColor = float3(0.3, 0.3, 0.3); // gray = no blend uint selfMat = readVoxelMat(voxelCoord, input.chunkIndex); if (selfMat != input.materialID) { - return float4(1, 0, 0, 1); // RED = data mismatch bug + output.color = float4(1, 0, 0, 1); // RED = data mismatch bug + output.normal = float4(N, 0.0); + return output; } if (uBlend) debugColor.r = uWeight * 2.0; if (vBlend) debugColor.b = vWeight * 2.0; if (!uBlend && !vBlend) debugColor.g = 0.5; - return float4(debugColor, 1.0); + output.color = float4(debugColor, 1.0); + output.normal = float4(N, 0.0); + return output; } if (uBlend || vBlend) { @@ -289,5 +303,7 @@ float4 main(PSInput input) : SV_TARGET0 float3 fogColor = float3(0.55, 0.70, 0.90); color = lerp(color, fogColor, saturate(fog)); - return float4(color, 1.0); + output.color = float4(color, 1.0); + output.normal = float4(N, 0.0); + return output; } diff --git a/shaders/voxelSmoothPS.hlsl b/shaders/voxelSmoothPS.hlsl index eddafcc..85da08b 100644 --- a/shaders/voxelSmoothPS.hlsl +++ b/shaders/voxelSmoothPS.hlsl @@ -90,9 +90,16 @@ float4 sampleTriplanarRGBA(float3 wp, float3 n, uint texIdx, float tiling) { return cx * w.x + cy * w.y + cz * w.z; } +// ── MRT Output ────────────────────────────────────────────────── +struct PSOutput { + float4 color : SV_TARGET0; + float4 normal : SV_TARGET1; +}; + // ── Main PS ────────────────────────────────────────────────────── [RootSignature(VOXEL_ROOTSIG)] -float4 main(PSInput input) : SV_TARGET0 { +PSOutput main(PSInput input) { + PSOutput output; float3 N = normalize(input.normal); // smooth normal (for lighting) // Geometric normal from screen-space derivatives of worldPos. @@ -219,5 +226,7 @@ float4 main(PSInput input) : SV_TARGET0 { float3 fogColor = float3(0.55, 0.70, 0.90); color = lerp(color, fogColor, saturate(fog)); - return float4(color, 1.0); + output.color = float4(color, 1.0); + output.normal = float4(N, 0.0); + return output; } diff --git a/shaders/voxelTopingPS.hlsl b/shaders/voxelTopingPS.hlsl index f457f33..a6654d1 100644 --- a/shaders/voxelTopingPS.hlsl +++ b/shaders/voxelTopingPS.hlsl @@ -14,8 +14,14 @@ struct PSInput { nointerpolation uint materialID : MATERIALID; }; +struct PSOutput { + float4 color : SV_TARGET0; + float4 normal : SV_TARGET1; +}; + [RootSignature(VOXEL_ROOTSIG)] -float4 main(PSInput input) : SV_TARGET0 { +PSOutput main(PSInput input) { + PSOutput output; float3 N = normalize(input.normal); float tiling = textureTiling; @@ -72,5 +78,7 @@ float4 main(PSInput input) : SV_TARGET0 { lit = texColor * (diffuse + ambient); } - return float4(lit, 1.0); + output.color = float4(lit, 1.0); + output.normal = float4(N, 0.0); + return output; } diff --git a/src/voxel/VoxelRenderer.cpp b/src/voxel/VoxelRenderer.cpp index ea23494..8af641b 100644 --- a/src/voxel/VoxelRenderer.cpp +++ b/src/voxel/VoxelRenderer.cpp @@ -156,6 +156,36 @@ void VoxelRenderer::initialize(GraphicsDevice* dev) { wi::backlog::post("VoxelRenderer: GPU smooth mesher available (2-pass with smooth normals)"); } + // ── Ray Tracing (Phase 6.1) ──────────────────────────────────── + rtAvailable_ = device_->CheckCapability(GraphicsDeviceCapability::RAYTRACING); + if (rtAvailable_) { + wi::renderer::LoadShader(ShaderStage::CS, blasExtractShader_, "voxel/voxelBLASExtractCS.cso"); + if (blasExtractShader_.IsValid()) { + // BLAS position buffer: 6 float3 per quad (non-indexed triangles) + // Use BUFFER_RAW (ByteAddressBuffer) — structured buffers may not work as BLAS vertex input + GPUBufferDesc posDesc; + posDesc.size = (uint64_t)MAX_BLAS_VERTICES * sizeof(float) * 3; // float3 per vertex + posDesc.bind_flags = BindFlag::UNORDERED_ACCESS | BindFlag::SHADER_RESOURCE; + posDesc.misc_flags = ResourceMiscFlag::BUFFER_RAW; + posDesc.stride = 0; // raw buffer, no stride + posDesc.usage = Usage::DEFAULT; + bool ok = device_->CreateBuffer(&posDesc, nullptr, &blasPositionBuffer_); + if (ok && blasPositionBuffer_.IsValid()) { + device_->SetName(&blasPositionBuffer_, "VoxelRenderer::blasPositionBuffer"); + wi::backlog::post("VoxelRenderer: RT available (BLAS position buffer " + + std::to_string(posDesc.size / (1024*1024)) + " MB)"); + } else { + rtAvailable_ = false; + wi::backlog::post("VoxelRenderer: RT buffer creation failed", wi::backlog::LogLevel::Warning); + } + } else { + rtAvailable_ = false; + wi::backlog::post("VoxelRenderer: RT available but BLAS extraction shader failed", wi::backlog::LogLevel::Warning); + } + } else { + wi::backlog::post("VoxelRenderer: RT not available (GPU does not support ray tracing)"); + } + cpuMegaQuads_.reserve(MEGA_BUFFER_CAPACITY); cpuChunkInfo_.reserve(MAX_CHUNKS); chunkSlots_.reserve(MAX_CHUNKS); @@ -901,6 +931,230 @@ void VoxelRenderer::dispatchGpuSmoothMesh(CommandList cmd, const VoxelWorld& wor gpuSmoothMeshDirty_ = false; } +// ── Ray Tracing: BLAS extraction + AS build (Phase 6.1) ────────── + +void VoxelRenderer::dispatchBLASExtract(CommandList cmd) const { + if (!rtAvailable_ || !blasExtractShader_.IsValid()) return; + + auto* dev = device_; + uint32_t quadCount = gpuMeshQuadCount_; + if (quadCount == 0) return; + + // Pre-barriers: blasPositionBuffer_ UNDEFINED → UAV + GPUBarrier preBarriers[] = { + GPUBarrier::Buffer(&blasPositionBuffer_, + ResourceState::UNDEFINED, ResourceState::UNORDERED_ACCESS), + }; + dev->Barrier(preBarriers, 1, cmd); + + // Bind compute shader + dev->BindComputeShader(&blasExtractShader_, cmd); + + // Bind resources: t0 = gpuQuadBuffer (SRV), t2 = chunkInfoBuffer (SRV), u0 = blasPositionBuffer (UAV) + dev->BindResource(&gpuQuadBuffer_, 0, cmd); // t0 + dev->BindResource(&chunkInfoBuffer_, 2, cmd); // t2 + dev->BindUAV(&blasPositionBuffer_, 0, cmd); // u0 + + // Push constants: quadCount + struct BLASPush { + uint32_t quadCount; + uint32_t pad[11]; + } pushData = {}; + pushData.quadCount = quadCount; + dev->PushConstants(&pushData, sizeof(pushData), cmd); + + // Dispatch: 64 threads per group + uint32_t groupCount = (quadCount + 63) / 64; + dev->Dispatch(groupCount, 1, 1, cmd); + + // Post-barrier: blasPositionBuffer_ UAV → SHADER_RESOURCE (for BLAS build) + GPUBarrier postBarriers[] = { + GPUBarrier::Buffer(&blasPositionBuffer_, + ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE), + }; + dev->Barrier(postBarriers, 1, cmd); + + rtBlockyVertexCount_ = quadCount * 6; +} + +void VoxelRenderer::buildAccelerationStructures(CommandList cmd) const { + if (!rtAvailable_) return; + + auto* dev = device_; + + // ── Blocky BLAS ────────────────────────────────────────────── + uint32_t blockyVertCount = rtBlockyVertexCount_; + if (blockyVertCount < 3) blockyVertCount = 0; // Need at least 1 triangle + if (blockyVertCount > 0 && blasPositionBuffer_.IsValid()) { + // (Re)create BLAS if needed (vertex count changed or first time) + if (!blockyBLAS_.IsValid() || blockyBLAS_.desc.bottom_level.geometries.empty() || + blockyBLAS_.desc.bottom_level.geometries[0].triangles.vertex_count != blockyVertCount) { + + RaytracingAccelerationStructureDesc desc; + desc.type = RaytracingAccelerationStructureDesc::Type::BOTTOMLEVEL; + desc.flags = RaytracingAccelerationStructureDesc::FLAG_PREFER_FAST_BUILD; + + desc.bottom_level.geometries.resize(1); + auto& geom = desc.bottom_level.geometries[0]; + geom.type = RaytracingAccelerationStructureDesc::BottomLevel::Geometry::Type::TRIANGLES; + geom.flags = RaytracingAccelerationStructureDesc::BottomLevel::Geometry::FLAG_OPAQUE; + geom.triangles.vertex_buffer = blasPositionBuffer_; + geom.triangles.vertex_byte_offset = 0; + geom.triangles.vertex_count = blockyVertCount; + geom.triangles.vertex_stride = sizeof(float) * 3; // 12 bytes per float3 + geom.triangles.vertex_format = Format::R32G32B32_FLOAT; + // Wicked ALWAYS accesses index_buffer via to_internal() even for non-indexed. + // Provide a valid buffer with index_count=0 to prevent null deref crash. + geom.triangles.index_buffer = blasPositionBuffer_; // dummy, won't be used + geom.triangles.index_count = 0; + + bool ok = dev->CreateRaytracingAccelerationStructure(&desc, + &blockyBLAS_); + if (ok) { + dev->SetName(&blockyBLAS_, "VoxelRenderer::blockyBLAS"); + wi::backlog::post("VoxelRenderer: blocky BLAS created (" + + std::to_string(blockyVertCount / 3) + " tris)"); + } else { + wi::backlog::post("VoxelRenderer: failed to create blocky BLAS", wi::backlog::LogLevel::Error); + rtAvailable_ = false; + return; + } + } + + // Build BLAS + dev->BuildRaytracingAccelerationStructure(&blockyBLAS_, cmd, nullptr); + } + + // ── Smooth BLAS ────────────────────────────────────────────── + // Smooth vertex buffer: float3 position at offset 0, stride 32 bytes + uint32_t smoothVertCount = gpuSmoothVertexCount_; + if (smoothVertCount < 3) smoothVertCount = 0; // Need at least 1 triangle + bool useGpuSmooth = smoothCentroidShader_.IsValid() && smoothMeshShader_.IsValid(); + const GPUBuffer& smoothVB = useGpuSmooth ? gpuSmoothVertexBuffer_ : smoothVertexBuffer_; + + if (smoothVertCount > 0 && smoothVB.IsValid()) { + if (!smoothBLAS_.IsValid() || smoothBLAS_.desc.bottom_level.geometries.empty() || + smoothBLAS_.desc.bottom_level.geometries[0].triangles.vertex_count != smoothVertCount) { + + RaytracingAccelerationStructureDesc desc; + desc.type = RaytracingAccelerationStructureDesc::Type::BOTTOMLEVEL; + desc.flags = RaytracingAccelerationStructureDesc::FLAG_PREFER_FAST_BUILD; + + desc.bottom_level.geometries.resize(1); + auto& geom = desc.bottom_level.geometries[0]; + geom.type = RaytracingAccelerationStructureDesc::BottomLevel::Geometry::Type::TRIANGLES; + geom.flags = RaytracingAccelerationStructureDesc::BottomLevel::Geometry::FLAG_OPAQUE; + geom.triangles.vertex_buffer = smoothVB; + geom.triangles.vertex_byte_offset = 0; + geom.triangles.vertex_count = smoothVertCount; + geom.triangles.vertex_stride = 32; // SmoothVtx struct = 32 bytes, position at offset 0 + // Wicked always accesses index_buffer (null deref if invalid) + geom.triangles.index_buffer = smoothVB; // dummy, won't be used + geom.triangles.index_count = 0; + geom.triangles.vertex_format = Format::R32G32B32_FLOAT; + + bool ok = dev->CreateRaytracingAccelerationStructure(&desc, + &smoothBLAS_); + if (ok) { + dev->SetName(&smoothBLAS_, "VoxelRenderer::smoothBLAS"); + wi::backlog::post("VoxelRenderer: smooth BLAS created (" + + std::to_string(smoothVertCount / 3) + " tris)"); + } else { + wi::backlog::post("VoxelRenderer: failed to create smooth BLAS", wi::backlog::LogLevel::Error); + } + } + + if (smoothBLAS_.IsValid()) { + dev->BuildRaytracingAccelerationStructure(&smoothBLAS_, cmd, nullptr); + } + + rtSmoothVertexCount_ = smoothVertCount; + } + + // ── TLAS (2 instances: blocky + smooth) ────────────────────── + // Always recreate TLAS with pre-filled instance data via CreateBuffer2. + // RAY_TRACING instance buffers have special resource state requirements, + // so UpdateBuffer (CopyBufferRegion) would crash on state mismatch. + uint32_t instanceCount = 0; + if (blockyBLAS_.IsValid()) instanceCount++; + if (smoothBLAS_.IsValid() && smoothVertCount > 0) instanceCount++; + if (instanceCount == 0) { rtDirty_ = false; return; } + + const size_t instSize = dev->GetTopLevelAccelerationStructureInstanceSize(); + + // Identity transform (3x4 row-major) + auto setIdentity = [](float transform[3][4]) { + std::memset(transform, 0, sizeof(float) * 12); + transform[0][0] = 1.0f; + transform[1][1] = 1.0f; + transform[2][2] = 1.0f; + }; + + // Capture BLAS pointers for the lambda (can't capture member references) + const RaytracingAccelerationStructure* blockyBLASPtr = blockyBLAS_.IsValid() ? &blockyBLAS_ : nullptr; + const RaytracingAccelerationStructure* smoothBLASPtr = (smoothBLAS_.IsValid() && smoothVertCount > 0) ? &smoothBLAS_ : nullptr; + + // Create TLAS with instance data pre-filled in the creation callback. + // This avoids any UpdateBuffer on RAY_TRACING flagged buffers. + RaytracingAccelerationStructureDesc desc; + desc.flags = RaytracingAccelerationStructureDesc::FLAG_PREFER_FAST_BUILD; + desc.type = RaytracingAccelerationStructureDesc::Type::TOPLEVEL; + desc.top_level.count = instanceCount; + + GPUBufferDesc bufdesc; + bufdesc.misc_flags = ResourceMiscFlag::RAY_TRACING; + bufdesc.stride = (uint32_t)instSize; + bufdesc.size = bufdesc.stride * desc.top_level.count; + + auto initInstances = [&](void* dest) { + uint32_t idx = 0; + + if (blockyBLASPtr) { + RaytracingAccelerationStructureDesc::TopLevel::Instance inst; + setIdentity(inst.transform); + inst.instance_id = 0; + inst.instance_mask = 0xFF; + inst.instance_contribution_to_hit_group_index = 0; + inst.flags = 0; + inst.bottom_level = blockyBLASPtr; + dev->WriteTopLevelAccelerationStructureInstance(&inst, (uint8_t*)dest + idx * instSize); + idx++; + } + + if (smoothBLASPtr) { + RaytracingAccelerationStructureDesc::TopLevel::Instance inst; + setIdentity(inst.transform); + inst.instance_id = 1; + inst.instance_mask = 0xFF; + inst.instance_contribution_to_hit_group_index = 0; + inst.flags = 0; + inst.bottom_level = smoothBLASPtr; + dev->WriteTopLevelAccelerationStructureInstance(&inst, (uint8_t*)dest + idx * instSize); + idx++; + } + }; + + bool ok = dev->CreateBuffer2(&bufdesc, initInstances, &desc.top_level.instance_buffer); + if (!ok) { + wi::backlog::post("VoxelRenderer: failed to create TLAS instance buffer", wi::backlog::LogLevel::Error); + rtDirty_ = false; + return; + } + + ok = dev->CreateRaytracingAccelerationStructure(&desc, + &tlas_); + if (!ok) { + wi::backlog::post("VoxelRenderer: failed to create TLAS", wi::backlog::LogLevel::Error); + rtDirty_ = false; + return; + } + + // Build TLAS + dev->BuildRaytracingAccelerationStructure(&tlas_, cmd, nullptr); + + rtDirty_ = false; +} + // ── Frustum plane extraction (Gribb-Hartmann method) ──────────── static void extractFrustumPlanes(const XMMATRIX& vp, XMFLOAT4 planes[6]) { XMFLOAT4X4 m; @@ -939,7 +1193,8 @@ void VoxelRenderer::render( CommandList cmd, const wi::scene::CameraComponent& camera, const Texture& depthBuffer, - const Texture& renderTarget + const Texture& renderTarget, + const Texture& normalTarget ) const { if (!initialized_ || chunkCount_ == 0 || !pso_.IsValid()) return; @@ -975,7 +1230,7 @@ void VoxelRenderer::render( cb.windTime = windTime_; dev->UpdateBuffer(&constantBuffer_, &cb, cmd, sizeof(cb)); - // Render pass + // Render pass (MRT: color + normals + depth) RenderPassImage rp[] = { RenderPassImage::RenderTarget( &renderTarget, @@ -984,6 +1239,13 @@ void VoxelRenderer::render( ResourceState::SHADER_RESOURCE, ResourceState::SHADER_RESOURCE ), + RenderPassImage::RenderTarget( + &normalTarget, + RenderPassImage::LoadOp::CLEAR, + RenderPassImage::StoreOp::STORE, + ResourceState::SHADER_RESOURCE, + ResourceState::SHADER_RESOURCE + ), RenderPassImage::DepthStencil( &depthBuffer, RenderPassImage::LoadOp::CLEAR, @@ -993,7 +1255,7 @@ void VoxelRenderer::render( ResourceState::DEPTHSTENCIL ), }; - dev->RenderPassBegin(rp, 2, cmd); + dev->RenderPassBegin(rp, 3, cmd); Viewport vp; vp.width = (float)renderTarget.GetDesc().width; @@ -1115,7 +1377,7 @@ void VoxelRenderer::render( }; dev->Barrier(postBarriers, 2, cmd); - // ── Render pass ──────────────────────────────────────────── + // ── Render pass (MRT: color + normals + depth) ────────────── RenderPassImage rp[] = { RenderPassImage::RenderTarget( &renderTarget, @@ -1124,6 +1386,13 @@ void VoxelRenderer::render( ResourceState::SHADER_RESOURCE, ResourceState::SHADER_RESOURCE ), + RenderPassImage::RenderTarget( + &normalTarget, + RenderPassImage::LoadOp::CLEAR, + RenderPassImage::StoreOp::STORE, + ResourceState::SHADER_RESOURCE, + ResourceState::SHADER_RESOURCE + ), RenderPassImage::DepthStencil( &depthBuffer, RenderPassImage::LoadOp::CLEAR, @@ -1133,7 +1402,7 @@ void VoxelRenderer::render( ResourceState::DEPTHSTENCIL ), }; - dev->RenderPassBegin(rp, 2, cmd); + dev->RenderPassBegin(rp, 3, cmd); Viewport vp; vp.width = (float)renderTarget.GetDesc().width; @@ -1268,7 +1537,7 @@ void VoxelRenderer::render( } dev->UpdateBuffer(&drawCountBuffer_, &cpuDrawCount, cmd, sizeof(uint32_t)); - // ── Render pass ──────────────────────────────────────────── + // ── Render pass (MRT: color + normals + depth) ────────────── RenderPassImage rp[] = { RenderPassImage::RenderTarget( &renderTarget, @@ -1277,6 +1546,13 @@ void VoxelRenderer::render( ResourceState::SHADER_RESOURCE, ResourceState::SHADER_RESOURCE ), + RenderPassImage::RenderTarget( + &normalTarget, + RenderPassImage::LoadOp::CLEAR, + RenderPassImage::StoreOp::STORE, + ResourceState::SHADER_RESOURCE, + ResourceState::SHADER_RESOURCE + ), RenderPassImage::DepthStencil( &depthBuffer, RenderPassImage::LoadOp::CLEAR, @@ -1286,7 +1562,7 @@ void VoxelRenderer::render( ResourceState::DEPTHSTENCIL ), }; - dev->RenderPassBegin(rp, 2, cmd); + dev->RenderPassBegin(rp, 3, cmd); Viewport vp; vp.width = (float)renderTarget.GetDesc().width; @@ -1330,6 +1606,13 @@ void VoxelRenderer::render( ResourceState::SHADER_RESOURCE, ResourceState::SHADER_RESOURCE ), + RenderPassImage::RenderTarget( + &normalTarget, + RenderPassImage::LoadOp::CLEAR, + RenderPassImage::StoreOp::STORE, + ResourceState::SHADER_RESOURCE, + ResourceState::SHADER_RESOURCE + ), RenderPassImage::DepthStencil( &depthBuffer, RenderPassImage::LoadOp::CLEAR, @@ -1339,7 +1622,7 @@ void VoxelRenderer::render( ResourceState::DEPTHSTENCIL ), }; - dev->RenderPassBegin(rp, 2, cmd); + dev->RenderPassBegin(rp, 3, cmd); Viewport vp; vp.width = (float)renderTarget.GetDesc().width; @@ -1467,7 +1750,8 @@ void VoxelRenderer::renderTopings( CommandList cmd, const TopingSystem& topingSystem, const Texture& depthBuffer, - const Texture& renderTarget + const Texture& renderTarget, + const Texture& normalTarget ) const { if (!topingPso_.IsValid() || !topingVertexBuffer_.IsValid() || !topingInstanceBuffer_.IsValid()) return; @@ -1487,6 +1771,13 @@ void VoxelRenderer::renderTopings( ResourceState::SHADER_RESOURCE, ResourceState::SHADER_RESOURCE ), + RenderPassImage::RenderTarget( + &normalTarget, + RenderPassImage::LoadOp::LOAD, + RenderPassImage::StoreOp::STORE, + ResourceState::SHADER_RESOURCE, + ResourceState::SHADER_RESOURCE + ), RenderPassImage::DepthStencil( &depthBuffer, RenderPassImage::LoadOp::LOAD, @@ -1496,7 +1787,7 @@ void VoxelRenderer::renderTopings( ResourceState::DEPTHSTENCIL ), }; - dev->RenderPassBegin(rp, 2, cmd); + dev->RenderPassBegin(rp, 3, cmd); // Viewport & scissor Viewport vp; @@ -1658,7 +1949,8 @@ void VoxelRenderer::uploadSmoothDataFast(VoxelWorld& world) { void VoxelRenderer::renderSmooth( CommandList cmd, const Texture& depthBuffer, - const Texture& renderTarget + const Texture& renderTarget, + const Texture& normalTarget ) const { // Use GPU-generated smooth buffer if available, otherwise CPU buffer const bool useGpuSmooth = smoothCentroidShader_.IsValid() && smoothMeshShader_.IsValid(); @@ -1678,6 +1970,13 @@ void VoxelRenderer::renderSmooth( ResourceState::SHADER_RESOURCE, ResourceState::SHADER_RESOURCE ), + RenderPassImage::RenderTarget( + &normalTarget, + RenderPassImage::LoadOp::LOAD, + RenderPassImage::StoreOp::STORE, + ResourceState::SHADER_RESOURCE, + ResourceState::SHADER_RESOURCE + ), RenderPassImage::DepthStencil( &depthBuffer, RenderPassImage::LoadOp::LOAD, @@ -1687,7 +1986,7 @@ void VoxelRenderer::renderSmooth( ResourceState::DEPTHSTENCIL ), }; - dev->RenderPassBegin(rp, 2, cmd); + dev->RenderPassBegin(rp, 3, cmd); // Viewport & scissor Viewport vp; @@ -1830,6 +2129,18 @@ void VoxelRenderPath::createRenderTargets() { rtDesc.layout = wi::graphics::ResourceState::SHADER_RESOURCE; device->CreateTexture(&rtDesc, nullptr, &voxelRT_); + // Normal render target (world-space normals for RT shadows/AO) + wi::graphics::TextureDesc normalDesc; + normalDesc.type = wi::graphics::TextureDesc::Type::TEXTURE_2D; + normalDesc.width = w; + normalDesc.height = h; + normalDesc.format = wi::graphics::Format::R16G16B16A16_SNORM; + normalDesc.bind_flags = wi::graphics::BindFlag::RENDER_TARGET | wi::graphics::BindFlag::SHADER_RESOURCE; + normalDesc.mip_levels = 1; + normalDesc.sample_count = 1; + normalDesc.layout = wi::graphics::ResourceState::SHADER_RESOURCE; + device->CreateTexture(&normalDesc, nullptr, &voxelNormalRT_); + wi::graphics::TextureDesc depthDesc; depthDesc.type = wi::graphics::TextureDesc::Type::TEXTURE_2D; depthDesc.width = w; @@ -1841,7 +2152,7 @@ void VoxelRenderPath::createRenderTargets() { depthDesc.layout = wi::graphics::ResourceState::DEPTHSTENCIL; device->CreateTexture(&depthDesc, nullptr, &voxelDepth_); - rtCreated_ = voxelRT_.IsValid() && voxelDepth_.IsValid(); + rtCreated_ = voxelRT_.IsValid() && voxelNormalRT_.IsValid() && voxelDepth_.IsValid(); wi::backlog::post("VoxelRenderPath: render targets " + std::string(rtCreated_ ? "OK" : "FAILED") + " (" + std::to_string(w) + "x" + std::to_string(h) + ")"); } @@ -2031,6 +2342,14 @@ void VoxelRenderPath::Render() const { renderer.smoothCentroidShader_.IsValid() && renderer.smoothMeshShader_.IsValid()) { renderer.gpuSmoothMeshDirty_ = true; } + + // Phase 6.1: BLAS extraction + acceleration structure build + if (renderer.rtAvailable_ && renderer.blasExtractShader_.IsValid() && + renderer.gpuMeshQuadCount_ > 0 && + (renderer.rtDirty_ || renderer.gpuMeshQuadCount_ != renderer.rtBlockyVertexCount_ / 6)) { + renderer.dispatchBLASExtract(cmd); + renderer.buildAccelerationStructures(cmd); + } } // GPU mesh benchmark state machine (runs once after world gen, CPU path only) @@ -2043,13 +2362,13 @@ void VoxelRenderPath::Render() const { } auto tRender0 = std::chrono::high_resolution_clock::now(); - renderer.render(cmd, *camera, voxelDepth_, voxelRT_); + renderer.render(cmd, *camera, voxelDepth_, voxelRT_, voxelNormalRT_); // Phase 4: render topings (separate render pass, preserves voxel output) - renderer.renderTopings(cmd, topingSystem, voxelDepth_, voxelRT_); + renderer.renderTopings(cmd, topingSystem, voxelDepth_, voxelRT_, voxelNormalRT_); // Phase 5: render smooth surfaces (separate render pass, preserves all prior output) - renderer.renderSmooth(cmd, voxelDepth_, voxelRT_); + renderer.renderSmooth(cmd, voxelDepth_, voxelRT_, voxelNormalRT_); auto tRender1 = std::chrono::high_resolution_clock::now(); profRender_.add(std::chrono::duration(tRender1 - tRender0).count()); } @@ -2121,7 +2440,7 @@ void VoxelRenderPath::Compose(CommandList cmd) const { char dtStr[16]; snprintf(dtStr, sizeof(dtStr), "%.2f", lastDt_ * 1000.0f); - std::string stats = "BVLE Voxel Engine (Phase 5 — Smooth Surfaces)\n"; + std::string stats = "BVLE Voxel Engine (Phase 6 — Ray Tracing)\n"; stats += "FPS: " + std::string(fpsStr) + " (" + std::string(dtStr) + " ms)\n"; if (debugMode) { stats += "=== DEBUG FACE MODE ===\n"; @@ -2158,6 +2477,17 @@ void VoxelRenderPath::Compose(CommandList cmd) const { + " verts (" + std::to_string(renderer.getSmoothVertexCount() / 3) + " tris), " + std::to_string(renderer.getSmoothDrawCalls()) + " draws\n"; } + if (renderer.isRTAvailable()) { + if (renderer.isRTReady()) { + stats += "RT: TLAS ready | Blocky " + + std::to_string(renderer.getRTBlockyTriCount()) + " tris | Smooth " + + std::to_string(renderer.getRTSmoothTriCount()) + " tris\n"; + } else { + stats += "RT: building...\n"; + } + } else { + stats += "RT: not available\n"; + } stats += "WASD+Space/Ctrl: move | Shift: fast | Right-click: capture mouse\n"; stats += "F2: console | F3: anim [" + std::string(animatedTerrain_ ? "ON" : "OFF") + "] | F4: dbg [" + std::string(renderer.debugBlend_ ? "ON" : "OFF") + "]"; diff --git a/src/voxel/VoxelRenderer.h b/src/voxel/VoxelRenderer.h index 1368ab7..27177a4 100644 --- a/src/voxel/VoxelRenderer.h +++ b/src/voxel/VoxelRenderer.h @@ -45,7 +45,8 @@ public: wi::graphics::CommandList cmd, const wi::scene::CameraComponent& camera, const wi::graphics::Texture& depthBuffer, - const wi::graphics::Texture& renderTarget + const wi::graphics::Texture& renderTarget, + const wi::graphics::Texture& normalTarget ) const; // Generate procedural textures for materials @@ -188,6 +189,21 @@ private: mutable uint32_t gpuSmoothVertexCount_ = 0; // readback from previous frame mutable bool gpuSmoothMeshDirty_ = true; + // ── Ray Tracing (Phase 6.1) ───────────────────────────────────── + wi::graphics::Shader blasExtractShader_; // voxelBLASExtractCS compute shader + mutable wi::graphics::GPUBuffer blasPositionBuffer_; // float3[] for blocky BLAS (6 verts per quad) + mutable wi::graphics::RaytracingAccelerationStructure blockyBLAS_; + mutable wi::graphics::RaytracingAccelerationStructure smoothBLAS_; + mutable wi::graphics::RaytracingAccelerationStructure tlas_; + static constexpr uint32_t MAX_BLAS_VERTICES = MEGA_BUFFER_CAPACITY * 6; // 6 verts per quad + mutable bool rtAvailable_ = false; // GPU supports RT + mutable bool rtDirty_ = true; // BLAS/TLAS need rebuild + mutable uint32_t rtBlockyVertexCount_ = 0; // current blocky BLAS vertex count + mutable uint32_t rtSmoothVertexCount_ = 0; // current smooth BLAS vertex count + + void dispatchBLASExtract(wi::graphics::CommandList cmd) const; + void buildAccelerationStructures(wi::graphics::CommandList cmd) const; + // Benchmark state machine: runs once after world gen enum class BenchState { IDLE, DISPATCH, READBACK, DONE }; mutable BenchState benchState_ = BenchState::IDLE; @@ -235,7 +251,8 @@ public: wi::graphics::CommandList cmd, const TopingSystem& topingSystem, const wi::graphics::Texture& depthBuffer, - const wi::graphics::Texture& renderTarget + const wi::graphics::Texture& renderTarget, + const wi::graphics::Texture& normalTarget ) const; uint32_t getTopingDrawCalls() const { return topingDrawCalls_; } @@ -245,10 +262,18 @@ public: void renderSmooth( wi::graphics::CommandList cmd, const wi::graphics::Texture& depthBuffer, - const wi::graphics::Texture& renderTarget + const wi::graphics::Texture& renderTarget, + const wi::graphics::Texture& normalTarget ) const; uint32_t getSmoothVertexCount() const { return (smoothCentroidShader_.IsValid() && smoothMeshShader_.IsValid()) ? gpuSmoothVertexCount_ : smoothVertexCount_; } uint32_t getSmoothDrawCalls() const { return smoothDrawCalls_; } + + // Phase 6: Ray Tracing + bool isRTAvailable() const { return rtAvailable_; } + bool isRTReady() const { return rtAvailable_ && tlas_.IsValid(); } + uint32_t getRTBlockyTriCount() const { return rtBlockyVertexCount_ / 3; } + uint32_t getRTSmoothTriCount() const { return rtSmoothVertexCount_ / 3; } + const wi::graphics::RaytracingAccelerationStructure& getTLAS() const { return tlas_; } }; // ── Custom RenderPath that integrates voxel rendering ─────────── @@ -291,6 +316,7 @@ private: static constexpr float ANIM_INTERVAL = 1.0f / 60.0f; // ~16.7ms = 60 Hz wi::graphics::Texture voxelRT_; + wi::graphics::Texture voxelNormalRT_; // Phase 6: world-space normals for RT shadows/AO wi::graphics::Texture voxelDepth_; mutable bool rtCreated_ = false;