fixes after Improving perfs

This commit is contained in:
Samuel Bouchet 2026-03-31 08:53:37 +02:00
parent 0d93cef8f1
commit 53df73e5e6
4 changed files with 96 additions and 55 deletions

View file

@ -50,13 +50,15 @@ VSOutput main(uint vertexID : SV_VertexID, uint instanceID : SV_InstanceID) {
// Quadratic scaling: base stays anchored, tips sway the most.
if (push.materialID != 3u) { // not stone
float localHeight = vtx.position.y - 1.0;
float amplitude = 2.0;
float frequency = 1.4;
if (localHeight > 0.0) {
float heightFactor = localHeight * localHeight; // quadratic
float phase = worldPos.x * 1.8 + worldPos.z * 1.3 + windTime * 3.5;
float phase2 = worldPos.x * 0.7 - worldPos.z * 2.1 + windTime * 2.7;
float swayX = sin(phase) * 0.11 * heightFactor;
float swayZ = cos(phase2) * 0.08 * heightFactor;
float swayY = -abs(sin(phase * 0.7)) * 0.02 * heightFactor; // slight droop
float phase = worldPos.x * 1.8 + worldPos.z * 1.3 + windTime * 3.5 * frequency;
float phase2 = worldPos.x * 0.7 - worldPos.z * 2.1 + windTime * 2.7 * frequency;
float swayX = sin(phase) * 0.11 * heightFactor * amplitude;
float swayZ = cos(phase2) * 0.08 * heightFactor * amplitude;
float swayY = -abs(sin(phase * 0.7)) * 0.02 * heightFactor * amplitude; // slight droop
worldPos.x += swayX;
worldPos.y += swayY;
worldPos.z += swayZ;

View file

@ -2011,6 +2011,7 @@ void VoxelRenderPath::handleInput(float dt) {
// Force full RT rebuild (including topings) when animation stops
renderer.rtDirty_ = true;
renderer.topingBLASDirty_ = true;
renderer.aoHistoryValid_ = false; // clear stale temporal AO from pre-animation
renderer.rtShadowsEnabled_ = rtWasEnabled_;
}
wi::backlog::post(animatedTerrain_ ? "Animation: ON (30 Hz)" : "Animation: OFF");
@ -2239,45 +2240,8 @@ void VoxelRenderPath::Render() const {
renderer.gpuSmoothMeshDirty_ = true;
}
// ── GPU compute toping BLAS extraction ──
// Skip during animation (toping BLAS is skipped to save ~130ms GPU)
if (renderer.topingBLASDirty_ && renderer.topingBLASShader_.IsValid() && !animatedTerrain_) {
device->QueryEnd(&renderer.timestampHeap_, VoxelRenderer::TS_BLAS_EXTRACT_BEGIN, cmd);
renderer.dispatchTopingBLASExtract(cmd);
device->QueryEnd(&renderer.timestampHeap_, VoxelRenderer::TS_BLAS_EXTRACT_END, cmd);
}
// Phase 6.1: BLAS extraction + acceleration structure build
// During animation, stagger builds to avoid 200ms+ GPU spikes:
// - Skip toping BLAS entirely (7.7M tris = ~130ms, decorative only)
// - Alternate blocky/smooth BLAS builds across animation frames
// When not animating, rebuild all immediately.
{
bool needsBuild = renderer.rtAvailable_ && renderer.blasExtractShader_.IsValid() &&
renderer.gpuMeshQuadCount_ > 0 &&
(renderer.rtDirty_ || renderer.gpuMeshQuadCount_ != renderer.rtBlockyVertexCount_ / 6);
if (needsBuild) {
device->QueryEnd(&renderer.timestampHeap_, VoxelRenderer::TS_BLAS_BUILD_BEGIN, cmd);
renderer.dispatchBLASExtract(cmd);
if (animatedTerrain_) {
// Stagger: alternate blocky/smooth each animation frame, skip topings
uint32_t flags = (rtBuildSkipCounter_ & 1)
? VoxelRenderer::RT_BUILD_BLOCKY
: VoxelRenderer::RT_BUILD_SMOOTH;
rtBuildSkipCounter_++;
renderer.buildAccelerationStructures(cmd, flags);
} else {
renderer.buildAccelerationStructures(cmd, VoxelRenderer::RT_BUILD_ALL);
}
device->QueryEnd(&renderer.timestampHeap_, VoxelRenderer::TS_BLAS_BUILD_END, cmd);
}
}
}
// ── Deferred GPU uploads (dirty flags set in Update(), need CommandList) ──
// ── Deferred GPU uploads BEFORE compute dispatches that read them ──
// topingInstanceBuffer_ must be filled before dispatchTopingBLASExtract reads it (t5)
if (renderer.topingInstanceDirty_ && renderer.topingInstanceBuffer_.IsValid() &&
!renderer.topingGpuInsts_.empty()) {
size_t uploadSize = renderer.topingGpuInsts_.size() * sizeof(VoxelRenderer::TopingGPUInst);
@ -2300,6 +2264,55 @@ void VoxelRenderPath::Render() const {
renderer.smoothVertexDirty_ = false;
}
// ── GPU compute toping BLAS extraction ──
// Skip during animation (toping BLAS is skipped to save ~130ms GPU)
if (renderer.topingBLASDirty_ && renderer.topingBLASShader_.IsValid() && !animatedTerrain_) {
device->QueryEnd(&renderer.timestampHeap_, VoxelRenderer::TS_BLAS_EXTRACT_BEGIN, cmd);
renderer.dispatchTopingBLASExtract(cmd);
device->QueryEnd(&renderer.timestampHeap_, VoxelRenderer::TS_BLAS_EXTRACT_END, cmd);
}
// Phase 6.1: BLAS extraction + acceleration structure build
// During animation, stagger builds to avoid 200ms+ GPU spikes:
// - Skip toping BLAS entirely (7.7M tris = ~130ms, decorative only)
// - Alternate blocky/smooth BLAS builds across animation frames
// When not animating, rebuild all immediately.
{
// Detect if new BLAS instances became available since last TLAS creation.
// Without this, the TLAS stays at 1-2 instances and never includes
// late-arriving smooth/toping BLASes (due to 1-frame readback delay).
uint32_t potentialInstances = 0;
if (renderer.gpuMeshQuadCount_ > 0) potentialInstances++;
if (renderer.gpuSmoothVertexCount_ >= 3) potentialInstances++;
if (renderer.rtTopingVertexCount_ >= 3) potentialInstances++;
bool tlasNeedsMoreInstances = potentialInstances > renderer.tlasInstanceCount_;
bool needsBuild = renderer.rtAvailable_ && renderer.blasExtractShader_.IsValid() &&
renderer.gpuMeshQuadCount_ > 0 &&
(renderer.rtDirty_ ||
renderer.gpuMeshQuadCount_ != renderer.rtBlockyVertexCount_ / 6 ||
tlasNeedsMoreInstances);
if (needsBuild) {
device->QueryEnd(&renderer.timestampHeap_, VoxelRenderer::TS_BLAS_BUILD_BEGIN, cmd);
renderer.dispatchBLASExtract(cmd);
if (animatedTerrain_) {
// Stagger: alternate blocky/smooth each animation frame, skip topings
uint32_t flags = (rtBuildSkipCounter_ & 1)
? VoxelRenderer::RT_BUILD_BLOCKY
: VoxelRenderer::RT_BUILD_SMOOTH;
rtBuildSkipCounter_++;
renderer.buildAccelerationStructures(cmd, flags);
} else {
renderer.buildAccelerationStructures(cmd, VoxelRenderer::RT_BUILD_ALL);
}
device->QueryEnd(&renderer.timestampHeap_, VoxelRenderer::TS_BLAS_BUILD_END, cmd);
}
}
}
// ── Draw passes ──
device->QueryEnd(&renderer.timestampHeap_, VoxelRenderer::TS_DRAW_BEGIN, cmd);
renderer.render(cmd, *camera, voxelDepth_, voxelRT_, voxelNormalRT_);
@ -2429,6 +2442,15 @@ void VoxelRenderPath::Compose(CommandList cmd) const {
wi::image::Params fx;
fx.enableFullScreen();
fx.blendFlag = wi::enums::BLENDMODE_OPAQUE;
// HDR support: when the swapchain is HDR, Compose() targets a linear float buffer
// (rendertargetPreHDR10). Our voxelRT_ contains tone-mapped sRGB values, so we must
// decode sRGB → linear and apply HDR scaling (same pattern as wiRenderPath2D).
// Without this, the HDR10 PQ curve double-transforms our already-gamma'd values.
if (colorspace != wi::graphics::ColorSpace::SRGB) {
fx.enableLinearOutputMapping(hdr_scaling);
}
wi::image::Draw(&voxelRT_, fx, cmd);
}

View file

@ -115,7 +115,7 @@ void VoxelWorld::generateChunk(Chunk& chunk, float timeOffset) {
const float caveScale = 0.05f;
const float caveThreshold = 0.3f;
// Animation mode: fewer octaves + skip caves (much faster for 20Hz regen)
// Animation mode: fewer octaves + skip caves + cached materials (much faster for 30Hz regen)
const bool animating = (timeOffset != 0.0f);
const int heightOctaves = animating ? 2 : 5;
@ -130,20 +130,27 @@ void VoxelWorld::generateChunk(Chunk& chunk, float timeOffset) {
float height = baseHeight + heightScale * fbm(wx * scale, timeOffset, wz * scale, heightOctaves);
// ── Surface material via noise-based patches ──
// Use 2D noise at different frequencies/seeds to create organic patches
// of each material on the surface, instead of altitude bands.
// Material noise is time-independent (uses y=0.0f, no timeOffset).
// During animation, reuse cached values to skip 8 noise3D calls/column.
const int colIdx = x + z * CHUNK_SIZE;
uint8_t surfaceMat;
bool surfaceSmooth = false;
if (animating) {
// Fast path: read cached material from initial generation
surfaceMat = chunk.cachedSurfaceMat[colIdx];
surfaceSmooth = (chunk.cachedSurfaceFlags[colIdx] != 0);
} else {
// Full path: compute material noise and cache it
float matNoise1 = fbm(wx * 0.03f + 500.0f, 0.0f, wz * 0.03f + 500.0f, 3); // large patches
float matNoise2 = fbm(wx * 0.08f + 1000.0f, 0.0f, wz * 0.08f + 1000.0f, 2); // medium detail
float matNoise3 = fbm(wx * 0.05f + 2000.0f, 0.0f, wz * 0.05f + 2000.0f, 3); // third channel
// Combined noise for material selection (range roughly -1..1)
float matVal = matNoise1 * 0.6f + matNoise2 * 0.4f;
uint8_t surfaceMat;
bool surfaceSmooth = false;
if (matVal < -0.30f) {
surfaceMat = 4; // Sand
} else if (matVal < -0.15f) {
surfaceMat = 2; // Dirt (adjacent to sand for sand↔dirt testing)
surfaceMat = 2; // Dirt
} else if (matVal < -0.05f) {
surfaceMat = 3; // Stone (blocky, with topings)
} else if (matVal < 0.05f) {
@ -152,7 +159,7 @@ void VoxelWorld::generateChunk(Chunk& chunk, float timeOffset) {
} else if (matVal < 0.20f) {
surfaceMat = 1; // Grass
} else if (matVal < 0.30f) {
surfaceMat = 4; // Sand (adjacent to grass for sand↔grass testing)
surfaceMat = 4; // Sand
} else if (matNoise3 > 0.1f) {
surfaceMat = 5; // Snow (smooth)
surfaceSmooth = true;
@ -160,6 +167,11 @@ void VoxelWorld::generateChunk(Chunk& chunk, float timeOffset) {
surfaceMat = 2; // Dirt
}
// Cache for future animation frames
chunk.cachedSurfaceMat[colIdx] = surfaceMat;
chunk.cachedSurfaceFlags[colIdx] = surfaceSmooth ? 1 : 0;
}
for (int y = 0; y < CHUNK_SIZE; y++) {
float wy = (float)(chunk.pos.y * CHUNK_SIZE + y);
VoxelData v;

View file

@ -25,6 +25,11 @@ struct Chunk {
bool hasSmooth = false; // true if chunk has smooth mesh output (set by mesher)
bool containsSmooth = false; // true if chunk contains any FLAG_SMOOTH voxels (set during generation)
// Cached surface material per column (set during initial generation, reused during animation)
// This avoids recomputing 8 noise3D calls per column that are time-independent.
uint8_t cachedSurfaceMat[CHUNK_SIZE * CHUNK_SIZE] = {}; // material ID per (x,z) column
uint8_t cachedSurfaceFlags[CHUNK_SIZE * CHUNK_SIZE] = {}; // smooth flag per (x,z) column
VoxelData& at(int x, int y, int z) {
return voxels[x + y * CHUNK_SIZE + z * CHUNK_SIZE * CHUNK_SIZE];
}