fixes after Improving perfs
This commit is contained in:
parent
0d93cef8f1
commit
53df73e5e6
4 changed files with 96 additions and 55 deletions
|
|
@ -50,13 +50,15 @@ VSOutput main(uint vertexID : SV_VertexID, uint instanceID : SV_InstanceID) {
|
||||||
// Quadratic scaling: base stays anchored, tips sway the most.
|
// Quadratic scaling: base stays anchored, tips sway the most.
|
||||||
if (push.materialID != 3u) { // not stone
|
if (push.materialID != 3u) { // not stone
|
||||||
float localHeight = vtx.position.y - 1.0;
|
float localHeight = vtx.position.y - 1.0;
|
||||||
|
float amplitude = 2.0;
|
||||||
|
float frequency = 1.4;
|
||||||
if (localHeight > 0.0) {
|
if (localHeight > 0.0) {
|
||||||
float heightFactor = localHeight * localHeight; // quadratic
|
float heightFactor = localHeight * localHeight; // quadratic
|
||||||
float phase = worldPos.x * 1.8 + worldPos.z * 1.3 + windTime * 3.5;
|
float phase = worldPos.x * 1.8 + worldPos.z * 1.3 + windTime * 3.5 * frequency;
|
||||||
float phase2 = worldPos.x * 0.7 - worldPos.z * 2.1 + windTime * 2.7;
|
float phase2 = worldPos.x * 0.7 - worldPos.z * 2.1 + windTime * 2.7 * frequency;
|
||||||
float swayX = sin(phase) * 0.11 * heightFactor;
|
float swayX = sin(phase) * 0.11 * heightFactor * amplitude;
|
||||||
float swayZ = cos(phase2) * 0.08 * heightFactor;
|
float swayZ = cos(phase2) * 0.08 * heightFactor * amplitude;
|
||||||
float swayY = -abs(sin(phase * 0.7)) * 0.02 * heightFactor; // slight droop
|
float swayY = -abs(sin(phase * 0.7)) * 0.02 * heightFactor * amplitude; // slight droop
|
||||||
worldPos.x += swayX;
|
worldPos.x += swayX;
|
||||||
worldPos.y += swayY;
|
worldPos.y += swayY;
|
||||||
worldPos.z += swayZ;
|
worldPos.z += swayZ;
|
||||||
|
|
|
||||||
|
|
@ -2011,6 +2011,7 @@ void VoxelRenderPath::handleInput(float dt) {
|
||||||
// Force full RT rebuild (including topings) when animation stops
|
// Force full RT rebuild (including topings) when animation stops
|
||||||
renderer.rtDirty_ = true;
|
renderer.rtDirty_ = true;
|
||||||
renderer.topingBLASDirty_ = true;
|
renderer.topingBLASDirty_ = true;
|
||||||
|
renderer.aoHistoryValid_ = false; // clear stale temporal AO from pre-animation
|
||||||
renderer.rtShadowsEnabled_ = rtWasEnabled_;
|
renderer.rtShadowsEnabled_ = rtWasEnabled_;
|
||||||
}
|
}
|
||||||
wi::backlog::post(animatedTerrain_ ? "Animation: ON (30 Hz)" : "Animation: OFF");
|
wi::backlog::post(animatedTerrain_ ? "Animation: ON (30 Hz)" : "Animation: OFF");
|
||||||
|
|
@ -2239,6 +2240,30 @@ void VoxelRenderPath::Render() const {
|
||||||
renderer.gpuSmoothMeshDirty_ = true;
|
renderer.gpuSmoothMeshDirty_ = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ── Deferred GPU uploads BEFORE compute dispatches that read them ──
|
||||||
|
// topingInstanceBuffer_ must be filled before dispatchTopingBLASExtract reads it (t5)
|
||||||
|
if (renderer.topingInstanceDirty_ && renderer.topingInstanceBuffer_.IsValid() &&
|
||||||
|
!renderer.topingGpuInsts_.empty()) {
|
||||||
|
size_t uploadSize = renderer.topingGpuInsts_.size() * sizeof(VoxelRenderer::TopingGPUInst);
|
||||||
|
size_t bufferSize = renderer.topingInstanceCapacity_ * sizeof(VoxelRenderer::TopingGPUInst);
|
||||||
|
if (uploadSize <= bufferSize) {
|
||||||
|
device->UpdateBuffer(&renderer.topingInstanceBuffer_,
|
||||||
|
renderer.topingGpuInsts_.data(), cmd, uploadSize);
|
||||||
|
}
|
||||||
|
renderer.topingInstanceDirty_ = false;
|
||||||
|
}
|
||||||
|
if (renderer.smoothVertexDirty_ && renderer.smoothVertexBuffer_.IsValid() &&
|
||||||
|
renderer.smoothVertexCount_ > 0 &&
|
||||||
|
renderer.smoothVertexCount_ <= renderer.smoothStagingVerts_.size()) {
|
||||||
|
size_t uploadSize = renderer.smoothVertexCount_ * sizeof(SmoothVertex);
|
||||||
|
size_t bufferSize = renderer.smoothVertexCapacity_ * sizeof(SmoothVertex);
|
||||||
|
if (uploadSize <= bufferSize) {
|
||||||
|
device->UpdateBuffer(&renderer.smoothVertexBuffer_,
|
||||||
|
renderer.smoothStagingVerts_.data(), cmd, uploadSize);
|
||||||
|
}
|
||||||
|
renderer.smoothVertexDirty_ = false;
|
||||||
|
}
|
||||||
|
|
||||||
// ── GPU compute toping BLAS extraction ──
|
// ── GPU compute toping BLAS extraction ──
|
||||||
// Skip during animation (toping BLAS is skipped to save ~130ms GPU)
|
// Skip during animation (toping BLAS is skipped to save ~130ms GPU)
|
||||||
if (renderer.topingBLASDirty_ && renderer.topingBLASShader_.IsValid() && !animatedTerrain_) {
|
if (renderer.topingBLASDirty_ && renderer.topingBLASShader_.IsValid() && !animatedTerrain_) {
|
||||||
|
|
@ -2253,9 +2278,20 @@ void VoxelRenderPath::Render() const {
|
||||||
// - Alternate blocky/smooth BLAS builds across animation frames
|
// - Alternate blocky/smooth BLAS builds across animation frames
|
||||||
// When not animating, rebuild all immediately.
|
// When not animating, rebuild all immediately.
|
||||||
{
|
{
|
||||||
|
// Detect if new BLAS instances became available since last TLAS creation.
|
||||||
|
// Without this, the TLAS stays at 1-2 instances and never includes
|
||||||
|
// late-arriving smooth/toping BLASes (due to 1-frame readback delay).
|
||||||
|
uint32_t potentialInstances = 0;
|
||||||
|
if (renderer.gpuMeshQuadCount_ > 0) potentialInstances++;
|
||||||
|
if (renderer.gpuSmoothVertexCount_ >= 3) potentialInstances++;
|
||||||
|
if (renderer.rtTopingVertexCount_ >= 3) potentialInstances++;
|
||||||
|
bool tlasNeedsMoreInstances = potentialInstances > renderer.tlasInstanceCount_;
|
||||||
|
|
||||||
bool needsBuild = renderer.rtAvailable_ && renderer.blasExtractShader_.IsValid() &&
|
bool needsBuild = renderer.rtAvailable_ && renderer.blasExtractShader_.IsValid() &&
|
||||||
renderer.gpuMeshQuadCount_ > 0 &&
|
renderer.gpuMeshQuadCount_ > 0 &&
|
||||||
(renderer.rtDirty_ || renderer.gpuMeshQuadCount_ != renderer.rtBlockyVertexCount_ / 6);
|
(renderer.rtDirty_ ||
|
||||||
|
renderer.gpuMeshQuadCount_ != renderer.rtBlockyVertexCount_ / 6 ||
|
||||||
|
tlasNeedsMoreInstances);
|
||||||
|
|
||||||
if (needsBuild) {
|
if (needsBuild) {
|
||||||
device->QueryEnd(&renderer.timestampHeap_, VoxelRenderer::TS_BLAS_BUILD_BEGIN, cmd);
|
device->QueryEnd(&renderer.timestampHeap_, VoxelRenderer::TS_BLAS_BUILD_BEGIN, cmd);
|
||||||
|
|
@ -2277,29 +2313,6 @@ void VoxelRenderPath::Render() const {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ── Deferred GPU uploads (dirty flags set in Update(), need CommandList) ──
|
|
||||||
if (renderer.topingInstanceDirty_ && renderer.topingInstanceBuffer_.IsValid() &&
|
|
||||||
!renderer.topingGpuInsts_.empty()) {
|
|
||||||
size_t uploadSize = renderer.topingGpuInsts_.size() * sizeof(VoxelRenderer::TopingGPUInst);
|
|
||||||
size_t bufferSize = renderer.topingInstanceCapacity_ * sizeof(VoxelRenderer::TopingGPUInst);
|
|
||||||
if (uploadSize <= bufferSize) {
|
|
||||||
device->UpdateBuffer(&renderer.topingInstanceBuffer_,
|
|
||||||
renderer.topingGpuInsts_.data(), cmd, uploadSize);
|
|
||||||
}
|
|
||||||
renderer.topingInstanceDirty_ = false;
|
|
||||||
}
|
|
||||||
if (renderer.smoothVertexDirty_ && renderer.smoothVertexBuffer_.IsValid() &&
|
|
||||||
renderer.smoothVertexCount_ > 0 &&
|
|
||||||
renderer.smoothVertexCount_ <= renderer.smoothStagingVerts_.size()) {
|
|
||||||
size_t uploadSize = renderer.smoothVertexCount_ * sizeof(SmoothVertex);
|
|
||||||
size_t bufferSize = renderer.smoothVertexCapacity_ * sizeof(SmoothVertex);
|
|
||||||
if (uploadSize <= bufferSize) {
|
|
||||||
device->UpdateBuffer(&renderer.smoothVertexBuffer_,
|
|
||||||
renderer.smoothStagingVerts_.data(), cmd, uploadSize);
|
|
||||||
}
|
|
||||||
renderer.smoothVertexDirty_ = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// ── Draw passes ──
|
// ── Draw passes ──
|
||||||
device->QueryEnd(&renderer.timestampHeap_, VoxelRenderer::TS_DRAW_BEGIN, cmd);
|
device->QueryEnd(&renderer.timestampHeap_, VoxelRenderer::TS_DRAW_BEGIN, cmd);
|
||||||
renderer.render(cmd, *camera, voxelDepth_, voxelRT_, voxelNormalRT_);
|
renderer.render(cmd, *camera, voxelDepth_, voxelRT_, voxelNormalRT_);
|
||||||
|
|
@ -2429,6 +2442,15 @@ void VoxelRenderPath::Compose(CommandList cmd) const {
|
||||||
wi::image::Params fx;
|
wi::image::Params fx;
|
||||||
fx.enableFullScreen();
|
fx.enableFullScreen();
|
||||||
fx.blendFlag = wi::enums::BLENDMODE_OPAQUE;
|
fx.blendFlag = wi::enums::BLENDMODE_OPAQUE;
|
||||||
|
|
||||||
|
// HDR support: when the swapchain is HDR, Compose() targets a linear float buffer
|
||||||
|
// (rendertargetPreHDR10). Our voxelRT_ contains tone-mapped sRGB values, so we must
|
||||||
|
// decode sRGB → linear and apply HDR scaling (same pattern as wiRenderPath2D).
|
||||||
|
// Without this, the HDR10 PQ curve double-transforms our already-gamma'd values.
|
||||||
|
if (colorspace != wi::graphics::ColorSpace::SRGB) {
|
||||||
|
fx.enableLinearOutputMapping(hdr_scaling);
|
||||||
|
}
|
||||||
|
|
||||||
wi::image::Draw(&voxelRT_, fx, cmd);
|
wi::image::Draw(&voxelRT_, fx, cmd);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -115,7 +115,7 @@ void VoxelWorld::generateChunk(Chunk& chunk, float timeOffset) {
|
||||||
const float caveScale = 0.05f;
|
const float caveScale = 0.05f;
|
||||||
const float caveThreshold = 0.3f;
|
const float caveThreshold = 0.3f;
|
||||||
|
|
||||||
// Animation mode: fewer octaves + skip caves (much faster for 20Hz regen)
|
// Animation mode: fewer octaves + skip caves + cached materials (much faster for 30Hz regen)
|
||||||
const bool animating = (timeOffset != 0.0f);
|
const bool animating = (timeOffset != 0.0f);
|
||||||
const int heightOctaves = animating ? 2 : 5;
|
const int heightOctaves = animating ? 2 : 5;
|
||||||
|
|
||||||
|
|
@ -130,34 +130,46 @@ void VoxelWorld::generateChunk(Chunk& chunk, float timeOffset) {
|
||||||
float height = baseHeight + heightScale * fbm(wx * scale, timeOffset, wz * scale, heightOctaves);
|
float height = baseHeight + heightScale * fbm(wx * scale, timeOffset, wz * scale, heightOctaves);
|
||||||
|
|
||||||
// ── Surface material via noise-based patches ──
|
// ── Surface material via noise-based patches ──
|
||||||
// Use 2D noise at different frequencies/seeds to create organic patches
|
// Material noise is time-independent (uses y=0.0f, no timeOffset).
|
||||||
// of each material on the surface, instead of altitude bands.
|
// During animation, reuse cached values to skip 8 noise3D calls/column.
|
||||||
float matNoise1 = fbm(wx * 0.03f + 500.0f, 0.0f, wz * 0.03f + 500.0f, 3); // large patches
|
const int colIdx = x + z * CHUNK_SIZE;
|
||||||
float matNoise2 = fbm(wx * 0.08f + 1000.0f, 0.0f, wz * 0.08f + 1000.0f, 2); // medium detail
|
|
||||||
float matNoise3 = fbm(wx * 0.05f + 2000.0f, 0.0f, wz * 0.05f + 2000.0f, 3); // third channel
|
|
||||||
// Combined noise for material selection (range roughly -1..1)
|
|
||||||
float matVal = matNoise1 * 0.6f + matNoise2 * 0.4f;
|
|
||||||
|
|
||||||
uint8_t surfaceMat;
|
uint8_t surfaceMat;
|
||||||
bool surfaceSmooth = false;
|
bool surfaceSmooth = false;
|
||||||
if (matVal < -0.30f) {
|
|
||||||
surfaceMat = 4; // Sand
|
if (animating) {
|
||||||
} else if (matVal < -0.15f) {
|
// Fast path: read cached material from initial generation
|
||||||
surfaceMat = 2; // Dirt (adjacent to sand for sand↔dirt testing)
|
surfaceMat = chunk.cachedSurfaceMat[colIdx];
|
||||||
} else if (matVal < -0.05f) {
|
surfaceSmooth = (chunk.cachedSurfaceFlags[colIdx] != 0);
|
||||||
surfaceMat = 3; // Stone (blocky, with topings)
|
|
||||||
} else if (matVal < 0.05f) {
|
|
||||||
surfaceMat = 6; // SmoothStone (smooth surface)
|
|
||||||
surfaceSmooth = true;
|
|
||||||
} else if (matVal < 0.20f) {
|
|
||||||
surfaceMat = 1; // Grass
|
|
||||||
} else if (matVal < 0.30f) {
|
|
||||||
surfaceMat = 4; // Sand (adjacent to grass for sand↔grass testing)
|
|
||||||
} else if (matNoise3 > 0.1f) {
|
|
||||||
surfaceMat = 5; // Snow (smooth)
|
|
||||||
surfaceSmooth = true;
|
|
||||||
} else {
|
} else {
|
||||||
surfaceMat = 2; // Dirt
|
// Full path: compute material noise and cache it
|
||||||
|
float matNoise1 = fbm(wx * 0.03f + 500.0f, 0.0f, wz * 0.03f + 500.0f, 3); // large patches
|
||||||
|
float matNoise2 = fbm(wx * 0.08f + 1000.0f, 0.0f, wz * 0.08f + 1000.0f, 2); // medium detail
|
||||||
|
float matNoise3 = fbm(wx * 0.05f + 2000.0f, 0.0f, wz * 0.05f + 2000.0f, 3); // third channel
|
||||||
|
float matVal = matNoise1 * 0.6f + matNoise2 * 0.4f;
|
||||||
|
|
||||||
|
if (matVal < -0.30f) {
|
||||||
|
surfaceMat = 4; // Sand
|
||||||
|
} else if (matVal < -0.15f) {
|
||||||
|
surfaceMat = 2; // Dirt
|
||||||
|
} else if (matVal < -0.05f) {
|
||||||
|
surfaceMat = 3; // Stone (blocky, with topings)
|
||||||
|
} else if (matVal < 0.05f) {
|
||||||
|
surfaceMat = 6; // SmoothStone (smooth surface)
|
||||||
|
surfaceSmooth = true;
|
||||||
|
} else if (matVal < 0.20f) {
|
||||||
|
surfaceMat = 1; // Grass
|
||||||
|
} else if (matVal < 0.30f) {
|
||||||
|
surfaceMat = 4; // Sand
|
||||||
|
} else if (matNoise3 > 0.1f) {
|
||||||
|
surfaceMat = 5; // Snow (smooth)
|
||||||
|
surfaceSmooth = true;
|
||||||
|
} else {
|
||||||
|
surfaceMat = 2; // Dirt
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cache for future animation frames
|
||||||
|
chunk.cachedSurfaceMat[colIdx] = surfaceMat;
|
||||||
|
chunk.cachedSurfaceFlags[colIdx] = surfaceSmooth ? 1 : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int y = 0; y < CHUNK_SIZE; y++) {
|
for (int y = 0; y < CHUNK_SIZE; y++) {
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,11 @@ struct Chunk {
|
||||||
bool hasSmooth = false; // true if chunk has smooth mesh output (set by mesher)
|
bool hasSmooth = false; // true if chunk has smooth mesh output (set by mesher)
|
||||||
bool containsSmooth = false; // true if chunk contains any FLAG_SMOOTH voxels (set during generation)
|
bool containsSmooth = false; // true if chunk contains any FLAG_SMOOTH voxels (set during generation)
|
||||||
|
|
||||||
|
// Cached surface material per column (set during initial generation, reused during animation)
|
||||||
|
// This avoids recomputing 8 noise3D calls per column that are time-independent.
|
||||||
|
uint8_t cachedSurfaceMat[CHUNK_SIZE * CHUNK_SIZE] = {}; // material ID per (x,z) column
|
||||||
|
uint8_t cachedSurfaceFlags[CHUNK_SIZE * CHUNK_SIZE] = {}; // smooth flag per (x,z) column
|
||||||
|
|
||||||
VoxelData& at(int x, int y, int z) {
|
VoxelData& at(int x, int y, int z) {
|
||||||
return voxels[x + y * CHUNK_SIZE + z * CHUNK_SIZE * CHUNK_SIZE];
|
return voxels[x + y * CHUNK_SIZE + z * CHUNK_SIZE * CHUNK_SIZE];
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue