From 53df73e5e6e97d821398cca41f984aeba007cc6c Mon Sep 17 00:00:00 2001 From: Samuel Bouchet Date: Tue, 31 Mar 2026 08:53:37 +0200 Subject: [PATCH] fixes after Improving perfs --- shaders/voxelTopingVS.hlsl | 12 ++++--- src/voxel/VoxelRenderer.cpp | 70 ++++++++++++++++++++++++------------- src/voxel/VoxelWorld.cpp | 64 +++++++++++++++++++-------------- src/voxel/VoxelWorld.h | 5 +++ 4 files changed, 96 insertions(+), 55 deletions(-) diff --git a/shaders/voxelTopingVS.hlsl b/shaders/voxelTopingVS.hlsl index 338b8d6..9ecbfcf 100644 --- a/shaders/voxelTopingVS.hlsl +++ b/shaders/voxelTopingVS.hlsl @@ -50,13 +50,15 @@ VSOutput main(uint vertexID : SV_VertexID, uint instanceID : SV_InstanceID) { // Quadratic scaling: base stays anchored, tips sway the most. if (push.materialID != 3u) { // not stone float localHeight = vtx.position.y - 1.0; + float amplitude = 2.0; + float frequency = 1.4; if (localHeight > 0.0) { float heightFactor = localHeight * localHeight; // quadratic - float phase = worldPos.x * 1.8 + worldPos.z * 1.3 + windTime * 3.5; - float phase2 = worldPos.x * 0.7 - worldPos.z * 2.1 + windTime * 2.7; - float swayX = sin(phase) * 0.11 * heightFactor; - float swayZ = cos(phase2) * 0.08 * heightFactor; - float swayY = -abs(sin(phase * 0.7)) * 0.02 * heightFactor; // slight droop + float phase = worldPos.x * 1.8 + worldPos.z * 1.3 + windTime * 3.5 * frequency; + float phase2 = worldPos.x * 0.7 - worldPos.z * 2.1 + windTime * 2.7 * frequency; + float swayX = sin(phase) * 0.11 * heightFactor * amplitude; + float swayZ = cos(phase2) * 0.08 * heightFactor * amplitude; + float swayY = -abs(sin(phase * 0.7)) * 0.02 * heightFactor * amplitude; // slight droop worldPos.x += swayX; worldPos.y += swayY; worldPos.z += swayZ; diff --git a/src/voxel/VoxelRenderer.cpp b/src/voxel/VoxelRenderer.cpp index 8c0e18f..b49392c 100644 --- a/src/voxel/VoxelRenderer.cpp +++ b/src/voxel/VoxelRenderer.cpp @@ -2011,6 +2011,7 @@ void VoxelRenderPath::handleInput(float dt) { // Force full RT rebuild (including topings) when animation stops renderer.rtDirty_ = true; renderer.topingBLASDirty_ = true; + renderer.aoHistoryValid_ = false; // clear stale temporal AO from pre-animation renderer.rtShadowsEnabled_ = rtWasEnabled_; } wi::backlog::post(animatedTerrain_ ? "Animation: ON (30 Hz)" : "Animation: OFF"); @@ -2239,6 +2240,30 @@ void VoxelRenderPath::Render() const { renderer.gpuSmoothMeshDirty_ = true; } + // ── Deferred GPU uploads BEFORE compute dispatches that read them ── + // topingInstanceBuffer_ must be filled before dispatchTopingBLASExtract reads it (t5) + if (renderer.topingInstanceDirty_ && renderer.topingInstanceBuffer_.IsValid() && + !renderer.topingGpuInsts_.empty()) { + size_t uploadSize = renderer.topingGpuInsts_.size() * sizeof(VoxelRenderer::TopingGPUInst); + size_t bufferSize = renderer.topingInstanceCapacity_ * sizeof(VoxelRenderer::TopingGPUInst); + if (uploadSize <= bufferSize) { + device->UpdateBuffer(&renderer.topingInstanceBuffer_, + renderer.topingGpuInsts_.data(), cmd, uploadSize); + } + renderer.topingInstanceDirty_ = false; + } + if (renderer.smoothVertexDirty_ && renderer.smoothVertexBuffer_.IsValid() && + renderer.smoothVertexCount_ > 0 && + renderer.smoothVertexCount_ <= renderer.smoothStagingVerts_.size()) { + size_t uploadSize = renderer.smoothVertexCount_ * sizeof(SmoothVertex); + size_t bufferSize = renderer.smoothVertexCapacity_ * sizeof(SmoothVertex); + if (uploadSize <= bufferSize) { + device->UpdateBuffer(&renderer.smoothVertexBuffer_, + renderer.smoothStagingVerts_.data(), cmd, uploadSize); + } + renderer.smoothVertexDirty_ = false; + } + // ── GPU compute toping BLAS extraction ── // Skip during animation (toping BLAS is skipped to save ~130ms GPU) if (renderer.topingBLASDirty_ && renderer.topingBLASShader_.IsValid() && !animatedTerrain_) { @@ -2253,9 +2278,20 @@ void VoxelRenderPath::Render() const { // - Alternate blocky/smooth BLAS builds across animation frames // When not animating, rebuild all immediately. { + // Detect if new BLAS instances became available since last TLAS creation. + // Without this, the TLAS stays at 1-2 instances and never includes + // late-arriving smooth/toping BLASes (due to 1-frame readback delay). + uint32_t potentialInstances = 0; + if (renderer.gpuMeshQuadCount_ > 0) potentialInstances++; + if (renderer.gpuSmoothVertexCount_ >= 3) potentialInstances++; + if (renderer.rtTopingVertexCount_ >= 3) potentialInstances++; + bool tlasNeedsMoreInstances = potentialInstances > renderer.tlasInstanceCount_; + bool needsBuild = renderer.rtAvailable_ && renderer.blasExtractShader_.IsValid() && renderer.gpuMeshQuadCount_ > 0 && - (renderer.rtDirty_ || renderer.gpuMeshQuadCount_ != renderer.rtBlockyVertexCount_ / 6); + (renderer.rtDirty_ || + renderer.gpuMeshQuadCount_ != renderer.rtBlockyVertexCount_ / 6 || + tlasNeedsMoreInstances); if (needsBuild) { device->QueryEnd(&renderer.timestampHeap_, VoxelRenderer::TS_BLAS_BUILD_BEGIN, cmd); @@ -2277,29 +2313,6 @@ void VoxelRenderPath::Render() const { } } - // ── Deferred GPU uploads (dirty flags set in Update(), need CommandList) ── - if (renderer.topingInstanceDirty_ && renderer.topingInstanceBuffer_.IsValid() && - !renderer.topingGpuInsts_.empty()) { - size_t uploadSize = renderer.topingGpuInsts_.size() * sizeof(VoxelRenderer::TopingGPUInst); - size_t bufferSize = renderer.topingInstanceCapacity_ * sizeof(VoxelRenderer::TopingGPUInst); - if (uploadSize <= bufferSize) { - device->UpdateBuffer(&renderer.topingInstanceBuffer_, - renderer.topingGpuInsts_.data(), cmd, uploadSize); - } - renderer.topingInstanceDirty_ = false; - } - if (renderer.smoothVertexDirty_ && renderer.smoothVertexBuffer_.IsValid() && - renderer.smoothVertexCount_ > 0 && - renderer.smoothVertexCount_ <= renderer.smoothStagingVerts_.size()) { - size_t uploadSize = renderer.smoothVertexCount_ * sizeof(SmoothVertex); - size_t bufferSize = renderer.smoothVertexCapacity_ * sizeof(SmoothVertex); - if (uploadSize <= bufferSize) { - device->UpdateBuffer(&renderer.smoothVertexBuffer_, - renderer.smoothStagingVerts_.data(), cmd, uploadSize); - } - renderer.smoothVertexDirty_ = false; - } - // ── Draw passes ── device->QueryEnd(&renderer.timestampHeap_, VoxelRenderer::TS_DRAW_BEGIN, cmd); renderer.render(cmd, *camera, voxelDepth_, voxelRT_, voxelNormalRT_); @@ -2429,6 +2442,15 @@ void VoxelRenderPath::Compose(CommandList cmd) const { wi::image::Params fx; fx.enableFullScreen(); fx.blendFlag = wi::enums::BLENDMODE_OPAQUE; + + // HDR support: when the swapchain is HDR, Compose() targets a linear float buffer + // (rendertargetPreHDR10). Our voxelRT_ contains tone-mapped sRGB values, so we must + // decode sRGB → linear and apply HDR scaling (same pattern as wiRenderPath2D). + // Without this, the HDR10 PQ curve double-transforms our already-gamma'd values. + if (colorspace != wi::graphics::ColorSpace::SRGB) { + fx.enableLinearOutputMapping(hdr_scaling); + } + wi::image::Draw(&voxelRT_, fx, cmd); } diff --git a/src/voxel/VoxelWorld.cpp b/src/voxel/VoxelWorld.cpp index 7726ee5..bd4048f 100644 --- a/src/voxel/VoxelWorld.cpp +++ b/src/voxel/VoxelWorld.cpp @@ -115,7 +115,7 @@ void VoxelWorld::generateChunk(Chunk& chunk, float timeOffset) { const float caveScale = 0.05f; const float caveThreshold = 0.3f; - // Animation mode: fewer octaves + skip caves (much faster for 20Hz regen) + // Animation mode: fewer octaves + skip caves + cached materials (much faster for 30Hz regen) const bool animating = (timeOffset != 0.0f); const int heightOctaves = animating ? 2 : 5; @@ -130,34 +130,46 @@ void VoxelWorld::generateChunk(Chunk& chunk, float timeOffset) { float height = baseHeight + heightScale * fbm(wx * scale, timeOffset, wz * scale, heightOctaves); // ── Surface material via noise-based patches ── - // Use 2D noise at different frequencies/seeds to create organic patches - // of each material on the surface, instead of altitude bands. - float matNoise1 = fbm(wx * 0.03f + 500.0f, 0.0f, wz * 0.03f + 500.0f, 3); // large patches - float matNoise2 = fbm(wx * 0.08f + 1000.0f, 0.0f, wz * 0.08f + 1000.0f, 2); // medium detail - float matNoise3 = fbm(wx * 0.05f + 2000.0f, 0.0f, wz * 0.05f + 2000.0f, 3); // third channel - // Combined noise for material selection (range roughly -1..1) - float matVal = matNoise1 * 0.6f + matNoise2 * 0.4f; - + // Material noise is time-independent (uses y=0.0f, no timeOffset). + // During animation, reuse cached values to skip 8 noise3D calls/column. + const int colIdx = x + z * CHUNK_SIZE; uint8_t surfaceMat; bool surfaceSmooth = false; - if (matVal < -0.30f) { - surfaceMat = 4; // Sand - } else if (matVal < -0.15f) { - surfaceMat = 2; // Dirt (adjacent to sand for sand↔dirt testing) - } else if (matVal < -0.05f) { - surfaceMat = 3; // Stone (blocky, with topings) - } else if (matVal < 0.05f) { - surfaceMat = 6; // SmoothStone (smooth surface) - surfaceSmooth = true; - } else if (matVal < 0.20f) { - surfaceMat = 1; // Grass - } else if (matVal < 0.30f) { - surfaceMat = 4; // Sand (adjacent to grass for sand↔grass testing) - } else if (matNoise3 > 0.1f) { - surfaceMat = 5; // Snow (smooth) - surfaceSmooth = true; + + if (animating) { + // Fast path: read cached material from initial generation + surfaceMat = chunk.cachedSurfaceMat[colIdx]; + surfaceSmooth = (chunk.cachedSurfaceFlags[colIdx] != 0); } else { - surfaceMat = 2; // Dirt + // Full path: compute material noise and cache it + float matNoise1 = fbm(wx * 0.03f + 500.0f, 0.0f, wz * 0.03f + 500.0f, 3); // large patches + float matNoise2 = fbm(wx * 0.08f + 1000.0f, 0.0f, wz * 0.08f + 1000.0f, 2); // medium detail + float matNoise3 = fbm(wx * 0.05f + 2000.0f, 0.0f, wz * 0.05f + 2000.0f, 3); // third channel + float matVal = matNoise1 * 0.6f + matNoise2 * 0.4f; + + if (matVal < -0.30f) { + surfaceMat = 4; // Sand + } else if (matVal < -0.15f) { + surfaceMat = 2; // Dirt + } else if (matVal < -0.05f) { + surfaceMat = 3; // Stone (blocky, with topings) + } else if (matVal < 0.05f) { + surfaceMat = 6; // SmoothStone (smooth surface) + surfaceSmooth = true; + } else if (matVal < 0.20f) { + surfaceMat = 1; // Grass + } else if (matVal < 0.30f) { + surfaceMat = 4; // Sand + } else if (matNoise3 > 0.1f) { + surfaceMat = 5; // Snow (smooth) + surfaceSmooth = true; + } else { + surfaceMat = 2; // Dirt + } + + // Cache for future animation frames + chunk.cachedSurfaceMat[colIdx] = surfaceMat; + chunk.cachedSurfaceFlags[colIdx] = surfaceSmooth ? 1 : 0; } for (int y = 0; y < CHUNK_SIZE; y++) { diff --git a/src/voxel/VoxelWorld.h b/src/voxel/VoxelWorld.h index 5dd3d94..6cebecb 100644 --- a/src/voxel/VoxelWorld.h +++ b/src/voxel/VoxelWorld.h @@ -25,6 +25,11 @@ struct Chunk { bool hasSmooth = false; // true if chunk has smooth mesh output (set by mesher) bool containsSmooth = false; // true if chunk contains any FLAG_SMOOTH voxels (set during generation) + // Cached surface material per column (set during initial generation, reused during animation) + // This avoids recomputing 8 noise3D calls per column that are time-independent. + uint8_t cachedSurfaceMat[CHUNK_SIZE * CHUNK_SIZE] = {}; // material ID per (x,z) column + uint8_t cachedSurfaceFlags[CHUNK_SIZE * CHUNK_SIZE] = {}; // smooth flag per (x,z) column + VoxelData& at(int x, int y, int z) { return voxels[x + y * CHUNK_SIZE + z * CHUNK_SIZE * CHUNK_SIZE]; }