2026-03-25 14:24:05 +01:00
|
|
|
#include "VoxelRenderer.h"
|
2026-03-26 09:05:52 +01:00
|
|
|
#include "wiJobSystem.h"
|
2026-03-25 14:24:05 +01:00
|
|
|
#include "wiPrimitive.h"
|
|
|
|
|
#include <algorithm>
|
2026-03-25 22:51:22 +01:00
|
|
|
#include <chrono>
|
2026-03-25 14:24:05 +01:00
|
|
|
#include <cmath>
|
2026-03-26 09:05:52 +01:00
|
|
|
#include <cstring>
|
2026-03-25 14:24:05 +01:00
|
|
|
|
|
|
|
|
using namespace wi::graphics;
|
|
|
|
|
|
|
|
|
|
namespace voxel {
|
|
|
|
|
|
|
|
|
|
// ── VoxelRenderer Implementation ────────────────────────────────
|
|
|
|
|
|
|
|
|
|
VoxelRenderer::VoxelRenderer() = default;
|
|
|
|
|
VoxelRenderer::~VoxelRenderer() { shutdown(); }
|
|
|
|
|
|
|
|
|
|
void VoxelRenderer::initialize(GraphicsDevice* dev) {
|
|
|
|
|
device_ = dev;
|
|
|
|
|
if (!device_) return;
|
|
|
|
|
|
|
|
|
|
createPipeline();
|
|
|
|
|
if (!pso_.IsValid()) {
|
|
|
|
|
wi::backlog::post("VoxelRenderer: pipeline creation failed", wi::backlog::LogLevel::Error);
|
|
|
|
|
initialized_ = false;
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
generateTextures();
|
|
|
|
|
|
|
|
|
|
// Create mega quad buffer (SRV for vertex pulling)
|
|
|
|
|
GPUBufferDesc megaDesc;
|
|
|
|
|
megaDesc.size = MEGA_BUFFER_CAPACITY * sizeof(PackedQuad);
|
|
|
|
|
megaDesc.bind_flags = BindFlag::SHADER_RESOURCE;
|
|
|
|
|
megaDesc.misc_flags = ResourceMiscFlag::BUFFER_STRUCTURED;
|
|
|
|
|
megaDesc.stride = sizeof(PackedQuad);
|
|
|
|
|
megaDesc.usage = Usage::DEFAULT;
|
|
|
|
|
device_->CreateBuffer(&megaDesc, nullptr, &megaQuadBuffer_);
|
|
|
|
|
|
|
|
|
|
// Create chunk info buffer (SRV for VS chunk lookup)
|
|
|
|
|
GPUBufferDesc infoDesc;
|
|
|
|
|
infoDesc.size = MAX_CHUNKS * sizeof(GPUChunkInfo);
|
|
|
|
|
infoDesc.bind_flags = BindFlag::SHADER_RESOURCE;
|
|
|
|
|
infoDesc.misc_flags = ResourceMiscFlag::BUFFER_STRUCTURED;
|
|
|
|
|
infoDesc.stride = sizeof(GPUChunkInfo);
|
|
|
|
|
infoDesc.usage = Usage::DEFAULT;
|
|
|
|
|
device_->CreateBuffer(&infoDesc, nullptr, &chunkInfoBuffer_);
|
|
|
|
|
|
|
|
|
|
// Create indirect args buffer (for DrawInstancedIndirectCount, up to 6 draws per chunk)
|
|
|
|
|
// UAV bind flag needed for GPU cull compute shader to write args
|
|
|
|
|
GPUBufferDesc argsDesc;
|
|
|
|
|
argsDesc.size = MAX_DRAWS * sizeof(IndirectDrawArgs);
|
|
|
|
|
argsDesc.bind_flags = BindFlag::UNORDERED_ACCESS;
|
|
|
|
|
argsDesc.misc_flags = ResourceMiscFlag::BUFFER_STRUCTURED | ResourceMiscFlag::INDIRECT_ARGS;
|
|
|
|
|
argsDesc.stride = sizeof(IndirectDrawArgs);
|
|
|
|
|
argsDesc.usage = Usage::DEFAULT;
|
|
|
|
|
device_->CreateBuffer(&argsDesc, nullptr, &indirectArgsBuffer_);
|
|
|
|
|
|
|
|
|
|
// Create draw count buffer (single uint32, raw for RWByteAddressBuffer)
|
|
|
|
|
// UAV bind flag needed for GPU cull compute shader atomic counter
|
|
|
|
|
GPUBufferDesc countDesc;
|
|
|
|
|
countDesc.size = sizeof(uint32_t);
|
|
|
|
|
countDesc.bind_flags = BindFlag::UNORDERED_ACCESS;
|
|
|
|
|
countDesc.misc_flags = ResourceMiscFlag::BUFFER_RAW | ResourceMiscFlag::INDIRECT_ARGS;
|
|
|
|
|
countDesc.usage = Usage::DEFAULT;
|
|
|
|
|
device_->CreateBuffer(&countDesc, nullptr, &drawCountBuffer_);
|
|
|
|
|
|
|
|
|
|
// ── GPU Timestamp Queries ──────────────────────────────────────
|
|
|
|
|
GPUQueryHeapDesc queryDesc;
|
|
|
|
|
queryDesc.type = GpuQueryType::TIMESTAMP;
|
|
|
|
|
queryDesc.query_count = TS_COUNT;
|
|
|
|
|
device_->CreateQueryHeap(&queryDesc, ×tampHeap_);
|
|
|
|
|
|
|
|
|
|
GPUBufferDesc readbackDesc;
|
|
|
|
|
readbackDesc.size = TS_COUNT * sizeof(uint64_t);
|
|
|
|
|
readbackDesc.usage = Usage::READBACK;
|
|
|
|
|
device_->CreateBuffer(&readbackDesc, nullptr, ×tampReadback_);
|
|
|
|
|
|
|
|
|
|
// ── GPU Compute Mesher resources ─────────────────────────────
|
|
|
|
|
wi::renderer::LoadShader(ShaderStage::CS, meshShader_, "voxel/voxelMeshCS.cso");
|
|
|
|
|
gpuMesherAvailable_ = meshShader_.IsValid();
|
|
|
|
|
if (gpuMesherAvailable_) {
|
|
|
|
|
// Voxel data buffer: 1 chunk's worth (32^3 voxels / 2 per uint = 16384 uint)
|
|
|
|
|
GPUBufferDesc voxDesc;
|
|
|
|
|
voxDesc.size = (CHUNK_VOLUME / 2) * sizeof(uint32_t);
|
|
|
|
|
voxDesc.bind_flags = BindFlag::SHADER_RESOURCE;
|
|
|
|
|
voxDesc.misc_flags = ResourceMiscFlag::BUFFER_STRUCTURED;
|
|
|
|
|
voxDesc.stride = sizeof(uint32_t);
|
|
|
|
|
voxDesc.usage = Usage::DEFAULT;
|
|
|
|
|
device_->CreateBuffer(&voxDesc, nullptr, &voxelDataBuffer_);
|
|
|
|
|
|
|
|
|
|
// GPU quad output: same capacity as mega-buffer
|
|
|
|
|
GPUBufferDesc gpuQDesc;
|
|
|
|
|
gpuQDesc.size = MEGA_BUFFER_CAPACITY * sizeof(uint64_t); // PackedQuad = 8 bytes
|
2026-03-26 09:05:52 +01:00
|
|
|
gpuQDesc.bind_flags = BindFlag::UNORDERED_ACCESS | BindFlag::SHADER_RESOURCE;
|
2026-03-25 14:24:05 +01:00
|
|
|
gpuQDesc.misc_flags = ResourceMiscFlag::BUFFER_STRUCTURED;
|
|
|
|
|
gpuQDesc.stride = sizeof(uint64_t); // uint2 = 8 bytes
|
|
|
|
|
gpuQDesc.usage = Usage::DEFAULT;
|
|
|
|
|
device_->CreateBuffer(&gpuQDesc, nullptr, &gpuQuadBuffer_);
|
|
|
|
|
|
|
|
|
|
// Quad counter
|
|
|
|
|
GPUBufferDesc cntDesc;
|
|
|
|
|
cntDesc.size = sizeof(uint32_t);
|
|
|
|
|
cntDesc.bind_flags = BindFlag::UNORDERED_ACCESS;
|
|
|
|
|
cntDesc.misc_flags = ResourceMiscFlag::BUFFER_RAW;
|
|
|
|
|
cntDesc.usage = Usage::DEFAULT;
|
|
|
|
|
device_->CreateBuffer(&cntDesc, nullptr, &gpuQuadCounter_);
|
|
|
|
|
|
2026-03-25 22:51:22 +01:00
|
|
|
// Readback buffer for quad counter (GPU → CPU)
|
|
|
|
|
GPUBufferDesc rbDesc;
|
|
|
|
|
rbDesc.size = sizeof(uint32_t);
|
|
|
|
|
rbDesc.usage = Usage::READBACK;
|
|
|
|
|
device_->CreateBuffer(&rbDesc, nullptr, &meshCounterReadback_);
|
|
|
|
|
|
2026-03-25 14:24:05 +01:00
|
|
|
wi::backlog::post("VoxelRenderer: GPU compute mesher available");
|
|
|
|
|
} else {
|
|
|
|
|
wi::backlog::post("VoxelRenderer: GPU compute mesher not available", wi::backlog::LogLevel::Warning);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
cpuMegaQuads_.reserve(MEGA_BUFFER_CAPACITY);
|
|
|
|
|
cpuChunkInfo_.reserve(MAX_CHUNKS);
|
|
|
|
|
chunkSlots_.reserve(MAX_CHUNKS);
|
|
|
|
|
cpuIndirectArgs_.reserve(MAX_CHUNKS);
|
|
|
|
|
|
|
|
|
|
initialized_ = true;
|
|
|
|
|
wi::backlog::post("VoxelRenderer: initialized (mega-buffer: "
|
|
|
|
|
+ std::to_string(MEGA_BUFFER_CAPACITY) + " quads capacity)");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void VoxelRenderer::shutdown() {
|
|
|
|
|
chunkSlots_.clear();
|
|
|
|
|
cpuChunkInfo_.clear();
|
|
|
|
|
cpuMegaQuads_.clear();
|
|
|
|
|
initialized_ = false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void VoxelRenderer::createPipeline() {
|
|
|
|
|
// Constant buffer for per-frame data
|
|
|
|
|
GPUBufferDesc cbDesc;
|
|
|
|
|
cbDesc.size = sizeof(VoxelConstants);
|
|
|
|
|
cbDesc.bind_flags = BindFlag::CONSTANT_BUFFER;
|
|
|
|
|
cbDesc.usage = Usage::DEFAULT;
|
|
|
|
|
device_->CreateBuffer(&cbDesc, nullptr, &constantBuffer_);
|
|
|
|
|
|
|
|
|
|
// Anisotropic wrap sampler
|
|
|
|
|
SamplerDesc samplerDesc;
|
|
|
|
|
samplerDesc.filter = Filter::ANISOTROPIC;
|
|
|
|
|
samplerDesc.address_u = TextureAddressMode::WRAP;
|
|
|
|
|
samplerDesc.address_v = TextureAddressMode::WRAP;
|
|
|
|
|
samplerDesc.address_w = TextureAddressMode::WRAP;
|
|
|
|
|
samplerDesc.max_anisotropy = 16;
|
|
|
|
|
device_->CreateSampler(&samplerDesc, &sampler_);
|
|
|
|
|
|
|
|
|
|
// Load shaders
|
|
|
|
|
wi::renderer::LoadShader(ShaderStage::VS, vertexShader_, "voxel/voxelVS.cso");
|
|
|
|
|
wi::renderer::LoadShader(ShaderStage::PS, pixelShader_, "voxel/voxelPS.cso");
|
|
|
|
|
wi::renderer::LoadShader(ShaderStage::CS, cullShader_, "voxel/voxelCullCS.cso");
|
|
|
|
|
|
|
|
|
|
if (!vertexShader_.IsValid() || !pixelShader_.IsValid()) {
|
|
|
|
|
wi::backlog::post("VoxelRenderer: shader loading failed", wi::backlog::LogLevel::Error);
|
|
|
|
|
return;
|
|
|
|
|
}
|
2026-03-25 14:50:55 +01:00
|
|
|
if (cullShader_.IsValid()) {
|
2026-03-25 22:30:50 +01:00
|
|
|
gpuCullingEnabled_ = true;
|
|
|
|
|
wi::backlog::post("VoxelRenderer: GPU cull compute shader enabled");
|
2026-03-25 14:24:05 +01:00
|
|
|
} else {
|
2026-03-25 22:30:50 +01:00
|
|
|
gpuCullingEnabled_ = false;
|
|
|
|
|
wi::backlog::post("VoxelRenderer: cull compute shader not available, using CPU fallback", wi::backlog::LogLevel::Warning);
|
2026-03-25 14:24:05 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Pipeline: backface cull, depth test, opaque blend, triangle list
|
|
|
|
|
PipelineStateDesc psoDesc;
|
|
|
|
|
psoDesc.vs = &vertexShader_;
|
|
|
|
|
psoDesc.ps = &pixelShader_;
|
|
|
|
|
psoDesc.rs = wi::renderer::GetRasterizerState(wi::enums::RSTYPE_FRONT);
|
|
|
|
|
psoDesc.dss = wi::renderer::GetDepthStencilState(wi::enums::DSSTYPE_DEFAULT);
|
|
|
|
|
psoDesc.bs = wi::renderer::GetBlendState(wi::enums::BSTYPE_OPAQUE);
|
|
|
|
|
psoDesc.pt = PrimitiveTopology::TRIANGLELIST;
|
|
|
|
|
|
|
|
|
|
device_->CreatePipelineState(&psoDesc, &pso_);
|
2026-03-26 17:47:08 +01:00
|
|
|
|
|
|
|
|
// ── Toping pipeline (Phase 4) ────────────────────────────────
|
|
|
|
|
wi::renderer::LoadShader(ShaderStage::VS, topingVS_, "voxel/voxelTopingVS.cso");
|
|
|
|
|
wi::renderer::LoadShader(ShaderStage::PS, topingPS_, "voxel/voxelTopingPS.cso");
|
|
|
|
|
|
|
|
|
|
if (topingVS_.IsValid() && topingPS_.IsValid()) {
|
|
|
|
|
PipelineStateDesc topingPsoDesc;
|
|
|
|
|
topingPsoDesc.vs = &topingVS_;
|
|
|
|
|
topingPsoDesc.ps = &topingPS_;
|
|
|
|
|
topingPsoDesc.rs = wi::renderer::GetRasterizerState(wi::enums::RSTYPE_FRONT);
|
|
|
|
|
topingPsoDesc.dss = wi::renderer::GetDepthStencilState(wi::enums::DSSTYPE_DEFAULT);
|
|
|
|
|
topingPsoDesc.bs = wi::renderer::GetBlendState(wi::enums::BSTYPE_OPAQUE);
|
|
|
|
|
topingPsoDesc.pt = PrimitiveTopology::TRIANGLELIST;
|
|
|
|
|
device_->CreatePipelineState(&topingPsoDesc, &topingPso_);
|
|
|
|
|
wi::backlog::post("VoxelRenderer: toping pipeline created");
|
|
|
|
|
} else {
|
|
|
|
|
wi::backlog::post("VoxelRenderer: toping shader loading failed", wi::backlog::LogLevel::Warning);
|
|
|
|
|
}
|
Phase 5.1: Naive Surface Nets smooth rendering
Implement CPU-side Naive Surface Nets for smooth voxel surfaces (SmoothStone,
Snow) coexisting with blocky voxels (Grass, Dirt, Stone, Sand).
Key features:
- SmoothMesher with binary SDF, centroid vertex placement, per-axis boundary
clamping to align with blocky grid at smooth↔blocky transitions
- Cross-chunk connectivity: PAD=2 SDF grid, vertex range [-1, CHUNK_SIZE),
canonical edge ownership (no duplicate triangles, no z-fighting)
- Face normals oriented by edge axis+sign (robust with binary SDF, unlike
SDF gradient dot or centroid sampling approaches)
- Y-axis winding fix: sharing cells have different spatial arrangement,
requiring opposite winding from X and Z axes
- GPU mesher treats smooth neighbors as solid (no blocky faces toward smooth)
- Material blending: primary (smooth-only) + secondary (all counts) per vertex
- Dedicated shaders: voxelSmoothVS (vertex pulling t6) + voxelSmoothPS
(triplanar + lerp blending between two materials)
- Separate render pass with LoadOp::LOAD after voxels+topings
- New materials: SmoothStone (mat 6), blocky Stone (mat 3) and Dirt patches
added to world generation for boundary testing
2026-03-27 13:03:55 +01:00
|
|
|
|
|
|
|
|
// ── Smooth surface pipeline (Phase 5) ────────────────────────
|
|
|
|
|
wi::renderer::LoadShader(ShaderStage::VS, smoothVS_, "voxel/voxelSmoothVS.cso");
|
|
|
|
|
wi::renderer::LoadShader(ShaderStage::PS, smoothPS_, "voxel/voxelSmoothPS.cso");
|
|
|
|
|
|
|
|
|
|
if (smoothVS_.IsValid() && smoothPS_.IsValid()) {
|
|
|
|
|
PipelineStateDesc smoothPsoDesc;
|
|
|
|
|
smoothPsoDesc.vs = &smoothVS_;
|
|
|
|
|
smoothPsoDesc.ps = &smoothPS_;
|
|
|
|
|
smoothPsoDesc.rs = wi::renderer::GetRasterizerState(wi::enums::RSTYPE_FRONT);
|
|
|
|
|
smoothPsoDesc.dss = wi::renderer::GetDepthStencilState(wi::enums::DSSTYPE_DEFAULT);
|
|
|
|
|
smoothPsoDesc.bs = wi::renderer::GetBlendState(wi::enums::BSTYPE_OPAQUE);
|
|
|
|
|
smoothPsoDesc.pt = PrimitiveTopology::TRIANGLELIST;
|
|
|
|
|
device_->CreatePipelineState(&smoothPsoDesc, &smoothPso_);
|
|
|
|
|
wi::backlog::post("VoxelRenderer: smooth surface pipeline created");
|
|
|
|
|
} else {
|
|
|
|
|
wi::backlog::post("VoxelRenderer: smooth shader loading failed", wi::backlog::LogLevel::Warning);
|
|
|
|
|
}
|
2026-03-25 14:24:05 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ── Procedural texture generation ───────────────────────────────
|
|
|
|
|
|
|
|
|
|
static void generateNoiseTexture(uint8_t* pixels, int w, int h,
|
|
|
|
|
uint8_t r0, uint8_t g0, uint8_t b0,
|
|
|
|
|
uint8_t r1, uint8_t g1, uint8_t b1,
|
2026-03-26 12:14:08 +01:00
|
|
|
uint32_t seed, float heightFreq = 1.0f, float heightContrast = 1.0f)
|
2026-03-25 14:24:05 +01:00
|
|
|
{
|
|
|
|
|
uint32_t s = seed;
|
2026-03-26 12:14:08 +01:00
|
|
|
uint32_t s2 = seed * 7919u + 104729u; // separate seed for heightmap
|
2026-03-25 14:24:05 +01:00
|
|
|
for (int y = 0; y < h; y++) {
|
|
|
|
|
for (int x = 0; x < w; x++) {
|
|
|
|
|
s = s * 1664525u + 1013904223u;
|
|
|
|
|
float noise = (float)(s & 0xFFFF) / 65535.0f;
|
|
|
|
|
float fx = (float)x / w;
|
|
|
|
|
float fy = (float)y / h;
|
|
|
|
|
float pattern = 0.5f + 0.5f * std::sin(fx * 20.0f + noise * 3.0f) *
|
|
|
|
|
std::cos(fy * 20.0f + noise * 3.0f);
|
|
|
|
|
float t = noise * 0.6f + pattern * 0.4f;
|
|
|
|
|
|
|
|
|
|
int idx = (y * w + x) * 4;
|
|
|
|
|
pixels[idx + 0] = (uint8_t)(r0 + (r1 - r0) * t);
|
|
|
|
|
pixels[idx + 1] = (uint8_t)(g0 + (g1 - g0) * t);
|
|
|
|
|
pixels[idx + 2] = (uint8_t)(b0 + (b1 - b0) * t);
|
2026-03-26 12:14:08 +01:00
|
|
|
|
|
|
|
|
// Heightmap in alpha: separate noise for height-based material blending
|
|
|
|
|
s2 = s2 * 1664525u + 1013904223u;
|
|
|
|
|
float hn = (float)(s2 & 0xFFFF) / 65535.0f;
|
|
|
|
|
float hPattern = 0.5f + 0.5f * std::sin(fx * 12.0f * heightFreq + hn * 2.0f) *
|
|
|
|
|
std::cos(fy * 12.0f * heightFreq + hn * 2.0f);
|
|
|
|
|
float heightVal = hn * 0.5f + hPattern * 0.5f;
|
|
|
|
|
heightVal = std::clamp(heightVal * heightContrast, 0.0f, 1.0f);
|
|
|
|
|
pixels[idx + 3] = (uint8_t)(heightVal * 255.0f);
|
2026-03-25 14:24:05 +01:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void VoxelRenderer::generateTextures() {
|
|
|
|
|
const int TEX_SIZE = 256;
|
Phase 5.1: Naive Surface Nets smooth rendering
Implement CPU-side Naive Surface Nets for smooth voxel surfaces (SmoothStone,
Snow) coexisting with blocky voxels (Grass, Dirt, Stone, Sand).
Key features:
- SmoothMesher with binary SDF, centroid vertex placement, per-axis boundary
clamping to align with blocky grid at smooth↔blocky transitions
- Cross-chunk connectivity: PAD=2 SDF grid, vertex range [-1, CHUNK_SIZE),
canonical edge ownership (no duplicate triangles, no z-fighting)
- Face normals oriented by edge axis+sign (robust with binary SDF, unlike
SDF gradient dot or centroid sampling approaches)
- Y-axis winding fix: sharing cells have different spatial arrangement,
requiring opposite winding from X and Z axes
- GPU mesher treats smooth neighbors as solid (no blocky faces toward smooth)
- Material blending: primary (smooth-only) + secondary (all counts) per vertex
- Dedicated shaders: voxelSmoothVS (vertex pulling t6) + voxelSmoothPS
(triplanar + lerp blending between two materials)
- Separate render pass with LoadOp::LOAD after voxels+topings
- New materials: SmoothStone (mat 6), blocky Stone (mat 3) and Dirt patches
added to world generation for boundary testing
2026-03-27 13:03:55 +01:00
|
|
|
const int NUM_MATERIALS = 6;
|
2026-03-25 14:24:05 +01:00
|
|
|
|
|
|
|
|
std::vector<uint8_t> allPixels(TEX_SIZE * TEX_SIZE * 4 * NUM_MATERIALS);
|
|
|
|
|
|
2026-03-26 12:14:08 +01:00
|
|
|
struct MatColor {
|
|
|
|
|
uint8_t r0,g0,b0, r1,g1,b1;
|
|
|
|
|
uint32_t seed;
|
|
|
|
|
float heightFreq; // heightmap noise frequency
|
|
|
|
|
float heightContrast; // heightmap contrast (higher = more defined peaks)
|
|
|
|
|
};
|
2026-03-25 14:24:05 +01:00
|
|
|
MatColor colors[NUM_MATERIALS] = {
|
Phase 5.1: Naive Surface Nets smooth rendering
Implement CPU-side Naive Surface Nets for smooth voxel surfaces (SmoothStone,
Snow) coexisting with blocky voxels (Grass, Dirt, Stone, Sand).
Key features:
- SmoothMesher with binary SDF, centroid vertex placement, per-axis boundary
clamping to align with blocky grid at smooth↔blocky transitions
- Cross-chunk connectivity: PAD=2 SDF grid, vertex range [-1, CHUNK_SIZE),
canonical edge ownership (no duplicate triangles, no z-fighting)
- Face normals oriented by edge axis+sign (robust with binary SDF, unlike
SDF gradient dot or centroid sampling approaches)
- Y-axis winding fix: sharing cells have different spatial arrangement,
requiring opposite winding from X and Z axes
- GPU mesher treats smooth neighbors as solid (no blocky faces toward smooth)
- Material blending: primary (smooth-only) + secondary (all counts) per vertex
- Dedicated shaders: voxelSmoothVS (vertex pulling t6) + voxelSmoothPS
(triplanar + lerp blending between two materials)
- Separate render pass with LoadOp::LOAD after voxels+topings
- New materials: SmoothStone (mat 6), blocky Stone (mat 3) and Dirt patches
added to world generation for boundary testing
2026-03-27 13:03:55 +01:00
|
|
|
{ 60, 140, 40, 80, 180, 60, 101, 1.5f, 0.8f }, // 1: Grass: medium bumps
|
|
|
|
|
{ 100, 70, 40, 140, 100, 60, 202, 0.8f, 0.6f }, // 2: Dirt: smooth mounds
|
|
|
|
|
{ 80, 80, 90, 120, 120, 130, 303, 2.5f, 0.5f }, // 3: Stone (blocky): darker blue-gray
|
|
|
|
|
{ 220, 200, 130, 245, 230, 160, 404, 3.0f, 0.4f }, // 4: Sand: warmer yellow, fine
|
|
|
|
|
{ 220, 225, 230, 245, 248, 252, 505, 1.0f, 0.5f }, // 5: Snow: smooth, soft
|
|
|
|
|
{ 100, 100, 110, 145, 145, 155, 606, 2.0f, 0.6f }, // 6: SmoothStone: lighter blue-gray, distinct from blocky stone
|
2026-03-25 14:24:05 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < NUM_MATERIALS; i++) {
|
|
|
|
|
auto& c = colors[i];
|
|
|
|
|
generateNoiseTexture(
|
|
|
|
|
allPixels.data() + i * TEX_SIZE * TEX_SIZE * 4,
|
|
|
|
|
TEX_SIZE, TEX_SIZE,
|
2026-03-26 12:14:08 +01:00
|
|
|
c.r0, c.g0, c.b0, c.r1, c.g1, c.b1, c.seed,
|
|
|
|
|
c.heightFreq, c.heightContrast
|
2026-03-25 14:24:05 +01:00
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TextureDesc texDesc;
|
|
|
|
|
texDesc.type = TextureDesc::Type::TEXTURE_2D;
|
|
|
|
|
texDesc.width = TEX_SIZE;
|
|
|
|
|
texDesc.height = TEX_SIZE;
|
|
|
|
|
texDesc.array_size = NUM_MATERIALS;
|
|
|
|
|
texDesc.mip_levels = 1;
|
|
|
|
|
texDesc.format = Format::R8G8B8A8_UNORM;
|
|
|
|
|
texDesc.bind_flags = BindFlag::SHADER_RESOURCE;
|
|
|
|
|
texDesc.usage = Usage::DEFAULT;
|
|
|
|
|
|
|
|
|
|
std::vector<SubresourceData> subData(NUM_MATERIALS);
|
|
|
|
|
for (int i = 0; i < NUM_MATERIALS; i++) {
|
|
|
|
|
subData[i].data_ptr = allPixels.data() + i * TEX_SIZE * TEX_SIZE * 4;
|
|
|
|
|
subData[i].row_pitch = TEX_SIZE * 4;
|
|
|
|
|
subData[i].slice_pitch = TEX_SIZE * TEX_SIZE * 4;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
device_->CreateTexture(&texDesc, subData.data(), &textureArray_);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ── Mega-buffer rebuild ─────────────────────────────────────────
|
|
|
|
|
// Packs all chunk quads contiguously into a single buffer.
|
|
|
|
|
// Simple strategy: full rebuild whenever any chunk is dirty.
|
|
|
|
|
|
|
|
|
|
void VoxelRenderer::rebuildMegaBuffer(VoxelWorld& world) {
|
|
|
|
|
cpuMegaQuads_.clear();
|
|
|
|
|
chunkSlots_.clear();
|
|
|
|
|
cpuChunkInfo_.clear();
|
|
|
|
|
|
|
|
|
|
uint32_t offset = 0;
|
|
|
|
|
float debugFlag = debugFaceColors_ ? 1.0f : 0.0f;
|
|
|
|
|
|
|
|
|
|
world.forEachChunk([&](const ChunkPos& pos, Chunk& chunk) {
|
|
|
|
|
if (chunk.quadCount == 0) return;
|
|
|
|
|
if (offset + chunk.quadCount > MEGA_BUFFER_CAPACITY) return; // overflow guard
|
|
|
|
|
|
|
|
|
|
ChunkSlot slot;
|
|
|
|
|
slot.pos = pos;
|
|
|
|
|
slot.quadOffset = offset;
|
|
|
|
|
slot.quadCount = chunk.quadCount;
|
|
|
|
|
chunkSlots_.push_back(slot);
|
|
|
|
|
|
|
|
|
|
GPUChunkInfo info = {};
|
|
|
|
|
info.worldPos = XMFLOAT4(
|
|
|
|
|
(float)(pos.x * CHUNK_SIZE),
|
|
|
|
|
(float)(pos.y * CHUNK_SIZE),
|
|
|
|
|
(float)(pos.z * CHUNK_SIZE),
|
|
|
|
|
debugFlag
|
|
|
|
|
);
|
|
|
|
|
info.quadOffset = offset;
|
|
|
|
|
info.quadCount = chunk.quadCount;
|
|
|
|
|
for (int f = 0; f < 6; f++) {
|
|
|
|
|
info.faceOffsets[f] = chunk.faceOffsets[f];
|
|
|
|
|
info.faceCounts[f] = chunk.faceCounts[f];
|
|
|
|
|
}
|
|
|
|
|
cpuChunkInfo_.push_back(info);
|
|
|
|
|
|
|
|
|
|
cpuMegaQuads_.insert(cpuMegaQuads_.end(), chunk.quads.begin(), chunk.quads.end());
|
|
|
|
|
offset += chunk.quadCount;
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
chunkCount_ = (uint32_t)chunkSlots_.size();
|
|
|
|
|
totalQuads_ = offset;
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-26 09:05:52 +01:00
|
|
|
// Build chunkInfoBuffer without CPU meshing (for GPU mesh path)
|
|
|
|
|
void VoxelRenderer::rebuildChunkInfoOnly(VoxelWorld& world) {
|
|
|
|
|
chunkSlots_.clear();
|
|
|
|
|
cpuChunkInfo_.clear();
|
|
|
|
|
|
|
|
|
|
uint32_t idx = 0;
|
|
|
|
|
float debugFlag = debugFaceColors_ ? 1.0f : 0.0f;
|
|
|
|
|
|
|
|
|
|
world.forEachChunk([&](const ChunkPos& pos, Chunk& chunk) {
|
|
|
|
|
ChunkSlot slot;
|
|
|
|
|
slot.pos = pos;
|
|
|
|
|
slot.quadOffset = 0; // not used in GPU mesh path
|
|
|
|
|
slot.quadCount = 0;
|
|
|
|
|
chunkSlots_.push_back(slot);
|
|
|
|
|
|
|
|
|
|
GPUChunkInfo info = {};
|
|
|
|
|
info.worldPos = XMFLOAT4(
|
|
|
|
|
(float)(pos.x * CHUNK_SIZE),
|
|
|
|
|
(float)(pos.y * CHUNK_SIZE),
|
|
|
|
|
(float)(pos.z * CHUNK_SIZE),
|
|
|
|
|
debugFlag
|
|
|
|
|
);
|
|
|
|
|
info.quadOffset = 0;
|
|
|
|
|
info.quadCount = 0;
|
|
|
|
|
cpuChunkInfo_.push_back(info);
|
|
|
|
|
idx++;
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
chunkCount_ = (uint32_t)chunkSlots_.size();
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-25 14:24:05 +01:00
|
|
|
void VoxelRenderer::updateMeshes(VoxelWorld& world) {
|
|
|
|
|
if (!device_) return;
|
|
|
|
|
|
2026-03-26 09:05:52 +01:00
|
|
|
// GPU mesh path: skip CPU meshing entirely, just rebuild chunk info
|
|
|
|
|
if (gpuMeshEnabled_ && gpuMesherAvailable_) {
|
|
|
|
|
bool anyDirty = false;
|
|
|
|
|
world.forEachChunk([&](const ChunkPos& pos, Chunk& chunk) {
|
|
|
|
|
if (chunk.dirty) { anyDirty = true; chunk.dirty = false; }
|
|
|
|
|
});
|
|
|
|
|
if (anyDirty || megaBufferDirty_) {
|
|
|
|
|
rebuildChunkInfoOnly(world);
|
|
|
|
|
// If cache wasn't already filled by fused regen+pack, mark for repack
|
|
|
|
|
if (!gpuMeshDirty_) {
|
|
|
|
|
// Non-fused dirty (e.g. initial load): need both repack and GPU update
|
|
|
|
|
voxelCacheDirty_ = true;
|
|
|
|
|
gpuMeshDirty_ = true;
|
|
|
|
|
}
|
|
|
|
|
// else: fused path already set gpuMeshDirty_=true, cache is clean
|
|
|
|
|
chunkInfoDirty_ = true;
|
|
|
|
|
megaBufferDirty_ = false;
|
2026-03-25 14:24:05 +01:00
|
|
|
}
|
2026-03-26 09:05:52 +01:00
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// CPU meshing path (fallback)
|
|
|
|
|
// Collect dirty chunks for parallel meshing
|
|
|
|
|
std::vector<Chunk*> dirtyChunks;
|
|
|
|
|
world.forEachChunk([&](const ChunkPos& pos, Chunk& chunk) {
|
|
|
|
|
if (chunk.dirty) dirtyChunks.push_back(&chunk);
|
2026-03-25 14:24:05 +01:00
|
|
|
});
|
2026-03-26 09:05:52 +01:00
|
|
|
bool anyDirty = !dirtyChunks.empty();
|
|
|
|
|
|
|
|
|
|
// Parallel CPU greedy meshing via wi::jobsystem
|
|
|
|
|
auto cpuStart = std::chrono::high_resolution_clock::now();
|
|
|
|
|
if (anyDirty) {
|
|
|
|
|
wi::jobsystem::context ctx;
|
|
|
|
|
wi::jobsystem::Dispatch(ctx, (uint32_t)dirtyChunks.size(), 1,
|
|
|
|
|
[&dirtyChunks, &world](wi::jobsystem::JobArgs args) {
|
|
|
|
|
VoxelMesher::meshChunk(*dirtyChunks[args.jobIndex], world);
|
|
|
|
|
});
|
|
|
|
|
wi::jobsystem::Wait(ctx);
|
|
|
|
|
}
|
2026-03-25 22:51:22 +01:00
|
|
|
auto cpuEnd = std::chrono::high_resolution_clock::now();
|
|
|
|
|
|
|
|
|
|
if (anyDirty) {
|
|
|
|
|
cpuMeshTimeMs_ = std::chrono::duration<float, std::milli>(cpuEnd - cpuStart).count();
|
|
|
|
|
// Trigger GPU benchmark on next render frame
|
|
|
|
|
if (gpuMesherAvailable_ && benchState_ == BenchState::IDLE) {
|
|
|
|
|
benchState_ = BenchState::DISPATCH;
|
|
|
|
|
}
|
|
|
|
|
}
|
2026-03-25 14:24:05 +01:00
|
|
|
|
|
|
|
|
if (anyDirty || megaBufferDirty_) {
|
|
|
|
|
rebuildMegaBuffer(world);
|
|
|
|
|
megaBufferDirty_ = false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-25 22:51:22 +01:00
|
|
|
// ── GPU Mesh Benchmark (Phase 2.4) ──────────────────────────────
|
|
|
|
|
// Dispatches the baseline 1x1 GPU mesher for ALL chunks and measures timing.
|
|
|
|
|
// State machine: DISPATCH (frame N) → READBACK (frame N+1) → DONE.
|
|
|
|
|
|
|
|
|
|
void VoxelRenderer::dispatchGpuMeshBenchmark(CommandList cmd, const VoxelWorld& world) const {
|
|
|
|
|
auto* dev = device_;
|
|
|
|
|
|
|
|
|
|
// Zero the quad counter
|
|
|
|
|
uint32_t zero = 0;
|
|
|
|
|
dev->UpdateBuffer(&gpuQuadCounter_, &zero, cmd, sizeof(uint32_t));
|
|
|
|
|
|
|
|
|
|
// Barrier: COPY_DST → UAV for counter, UNDEFINED → UAV for output buffer
|
|
|
|
|
GPUBarrier preBarriers[] = {
|
|
|
|
|
GPUBarrier::Buffer(&gpuQuadCounter_, ResourceState::COPY_DST, ResourceState::UNORDERED_ACCESS),
|
|
|
|
|
GPUBarrier::Buffer(&gpuQuadBuffer_, ResourceState::UNDEFINED, ResourceState::UNORDERED_ACCESS),
|
|
|
|
|
};
|
|
|
|
|
dev->Barrier(preBarriers, 2, cmd);
|
|
|
|
|
|
|
|
|
|
dev->BindComputeShader(&meshShader_, cmd);
|
|
|
|
|
|
|
|
|
|
// GPU timestamp: mesh begin
|
|
|
|
|
dev->QueryEnd(×tampHeap_, TS_MESH_BEGIN, cmd);
|
|
|
|
|
|
|
|
|
|
// Dispatch for each chunk
|
|
|
|
|
uint32_t chunkIdx = 0;
|
|
|
|
|
world.forEachChunk([&](const ChunkPos& pos, Chunk& chunk) {
|
|
|
|
|
// Pack voxel data: 32^3 voxels → 16384 uint32s (2 voxels per uint)
|
|
|
|
|
std::vector<uint32_t> packed(CHUNK_VOLUME / 2, 0);
|
|
|
|
|
for (int i = 0; i < CHUNK_VOLUME; i++) {
|
|
|
|
|
uint32_t v = chunk.voxels[i].packed;
|
|
|
|
|
if (i & 1)
|
|
|
|
|
packed[i >> 1] |= (v << 16);
|
|
|
|
|
else
|
|
|
|
|
packed[i >> 1] = v;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Upload voxel data (re-uses the single-chunk buffer)
|
|
|
|
|
dev->UpdateBuffer(&voxelDataBuffer_, packed.data(), cmd,
|
|
|
|
|
packed.size() * sizeof(uint32_t));
|
|
|
|
|
|
|
|
|
|
// Bind resources (after BindComputeShader, so PushConstants targets compute)
|
|
|
|
|
dev->BindResource(&voxelDataBuffer_, 0, cmd);
|
|
|
|
|
dev->BindUAV(&gpuQuadBuffer_, 0, cmd);
|
|
|
|
|
dev->BindUAV(&gpuQuadCounter_, 1, cmd);
|
|
|
|
|
|
|
|
|
|
// Push constants for this chunk
|
|
|
|
|
struct MeshPush {
|
|
|
|
|
uint32_t chunkIndex;
|
|
|
|
|
uint32_t voxelBufferOffset;
|
|
|
|
|
uint32_t quadBufferOffset;
|
|
|
|
|
uint32_t maxOutputQuads;
|
|
|
|
|
uint32_t pad[8];
|
|
|
|
|
};
|
|
|
|
|
MeshPush pushData = {};
|
|
|
|
|
pushData.chunkIndex = chunkIdx;
|
|
|
|
|
pushData.voxelBufferOffset = 0; // single-chunk buffer, always at offset 0
|
|
|
|
|
pushData.quadBufferOffset = 0; // all chunks share global atomic counter
|
|
|
|
|
pushData.maxOutputQuads = MEGA_BUFFER_CAPACITY;
|
|
|
|
|
dev->PushConstants(&pushData, sizeof(pushData), cmd);
|
|
|
|
|
|
|
|
|
|
// Dispatch: 32/8 = 4 groups per axis → 64 groups total
|
|
|
|
|
dev->Dispatch(4, 4, 4, cmd);
|
|
|
|
|
|
|
|
|
|
chunkIdx++;
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
// GPU timestamp: mesh end
|
|
|
|
|
dev->QueryEnd(×tampHeap_, TS_MESH_END, cmd);
|
|
|
|
|
|
|
|
|
|
// Copy quad counter to readback buffer
|
|
|
|
|
GPUBarrier postBarrier = GPUBarrier::Buffer(
|
|
|
|
|
&gpuQuadCounter_, ResourceState::UNORDERED_ACCESS, ResourceState::COPY_SRC);
|
|
|
|
|
dev->Barrier(&postBarrier, 1, cmd);
|
|
|
|
|
dev->CopyBuffer(&meshCounterReadback_, 0, &gpuQuadCounter_, 0, sizeof(uint32_t), cmd);
|
|
|
|
|
|
|
|
|
|
// Resolve timestamps
|
|
|
|
|
dev->QueryResolve(×tampHeap_, TS_MESH_BEGIN, 2, ×tampReadback_,
|
|
|
|
|
TS_MESH_BEGIN * sizeof(uint64_t), cmd);
|
|
|
|
|
|
|
|
|
|
benchState_ = BenchState::READBACK;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void VoxelRenderer::readbackGpuMeshBenchmark() const {
|
|
|
|
|
// Read quad count from readback buffer
|
|
|
|
|
uint32_t* countData = (uint32_t*)meshCounterReadback_.mapped_data;
|
|
|
|
|
if (countData) {
|
|
|
|
|
gpuBaselineQuads_ = *countData;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Read GPU mesh timestamps
|
|
|
|
|
uint64_t* tsData = (uint64_t*)timestampReadback_.mapped_data;
|
|
|
|
|
if (tsData) {
|
|
|
|
|
double freq = (double)device_->GetTimestampFrequency();
|
|
|
|
|
if (freq > 0.0 && tsData[TS_MESH_END] > tsData[TS_MESH_BEGIN]) {
|
|
|
|
|
gpuMeshTimeMs_ = (float)((double)(tsData[TS_MESH_END] - tsData[TS_MESH_BEGIN]) / freq * 1000.0);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Log benchmark results
|
|
|
|
|
char msg[256];
|
|
|
|
|
snprintf(msg, sizeof(msg),
|
|
|
|
|
"=== MESH BENCHMARK ===\n"
|
|
|
|
|
" CPU greedy: %.2f ms, %u quads (%u chunks)\n"
|
|
|
|
|
" GPU baseline: %.3f ms, %u quads (1x1, no merge)\n"
|
|
|
|
|
" Ratio quads: %.1fx more (GPU baseline vs CPU greedy)",
|
|
|
|
|
cpuMeshTimeMs_, totalQuads_, chunkCount_,
|
|
|
|
|
gpuMeshTimeMs_, gpuBaselineQuads_,
|
|
|
|
|
totalQuads_ > 0 ? (float)gpuBaselineQuads_ / totalQuads_ : 0.0f);
|
|
|
|
|
wi::backlog::post(msg);
|
|
|
|
|
|
|
|
|
|
benchState_ = BenchState::DONE;
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-26 09:05:52 +01:00
|
|
|
// ── GPU Mesh Dispatch (production path) ─────────────────────────
|
|
|
|
|
// Dispatches GPU mesher for ALL chunks every frame. Replaces CPU greedy meshing.
|
|
|
|
|
// Uses the atomic quad counter for 1-frame-delayed readback of total quad count.
|
|
|
|
|
|
|
|
|
|
void VoxelRenderer::dispatchGpuMesh(CommandList cmd, const VoxelWorld& world,
|
|
|
|
|
ProfileAccum* profPack, ProfileAccum* profUpload, ProfileAccum* profDispatch) const {
|
|
|
|
|
auto* dev = device_;
|
|
|
|
|
|
|
|
|
|
// Zero the quad counter
|
|
|
|
|
uint32_t zero = 0;
|
|
|
|
|
dev->UpdateBuffer(&gpuQuadCounter_, &zero, cmd, sizeof(uint32_t));
|
|
|
|
|
|
|
|
|
|
// Barrier: COPY_DST → UAV for counter, UNDEFINED → UAV for output buffer
|
|
|
|
|
GPUBarrier preBarriers[] = {
|
|
|
|
|
GPUBarrier::Buffer(&gpuQuadCounter_, ResourceState::COPY_DST, ResourceState::UNORDERED_ACCESS),
|
|
|
|
|
GPUBarrier::Buffer(&gpuQuadBuffer_, ResourceState::UNDEFINED, ResourceState::UNORDERED_ACCESS),
|
|
|
|
|
};
|
|
|
|
|
dev->Barrier(preBarriers, 2, cmd);
|
|
|
|
|
|
|
|
|
|
dev->BindComputeShader(&meshShader_, cmd);
|
|
|
|
|
|
|
|
|
|
// Pack and upload all chunks' voxel data
|
|
|
|
|
// Each chunk = 32^3/2 = 16384 uint32 (two voxels per uint)
|
|
|
|
|
const uint32_t wordsPerChunk = CHUNK_VOLUME / 2;
|
|
|
|
|
uint32_t totalWords = chunkCount_ * wordsPerChunk;
|
|
|
|
|
|
|
|
|
|
// Resize voxel data buffer if needed
|
|
|
|
|
if (totalWords > voxelDataCapacity_) {
|
|
|
|
|
voxelDataCapacity_ = totalWords;
|
|
|
|
|
GPUBufferDesc voxDesc;
|
|
|
|
|
voxDesc.size = totalWords * sizeof(uint32_t);
|
|
|
|
|
voxDesc.bind_flags = BindFlag::SHADER_RESOURCE;
|
|
|
|
|
voxDesc.misc_flags = ResourceMiscFlag::BUFFER_STRUCTURED;
|
|
|
|
|
voxDesc.stride = sizeof(uint32_t);
|
|
|
|
|
voxDesc.usage = Usage::DEFAULT;
|
|
|
|
|
dev->CreateBuffer(&voxDesc, nullptr, const_cast<GPUBuffer*>(&voxelDataBuffer_));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Pack voxel data — use cached copy, only update when dirty.
|
|
|
|
|
// VoxelData is exactly uint16_t, so voxels[] is a packed uint16 array.
|
|
|
|
|
// Two consecutive uint16 = one uint32 → direct memcpy, no bit manipulation.
|
|
|
|
|
static_assert(sizeof(VoxelData) == sizeof(uint16_t),
|
|
|
|
|
"VoxelData must be 2 bytes for direct memcpy to GPU buffer");
|
|
|
|
|
|
|
|
|
|
auto tPack0 = std::chrono::high_resolution_clock::now();
|
|
|
|
|
if (voxelCacheDirty_) {
|
|
|
|
|
packedVoxelCache_.resize(totalWords);
|
|
|
|
|
uint32_t chunkI = 0;
|
|
|
|
|
world.forEachChunk([&](const ChunkPos& pos, const Chunk& chunk) {
|
|
|
|
|
std::memcpy(
|
|
|
|
|
packedVoxelCache_.data() + chunkI * wordsPerChunk,
|
|
|
|
|
chunk.voxels,
|
|
|
|
|
wordsPerChunk * sizeof(uint32_t) // = CHUNK_VOLUME * 2 bytes
|
|
|
|
|
);
|
|
|
|
|
chunkI++;
|
|
|
|
|
});
|
|
|
|
|
voxelCacheDirty_ = false;
|
|
|
|
|
}
|
|
|
|
|
auto tPack1 = std::chrono::high_resolution_clock::now();
|
|
|
|
|
if (profPack) profPack->add(std::chrono::duration<float, std::milli>(tPack1 - tPack0).count());
|
|
|
|
|
|
|
|
|
|
// Upload all voxel data at once
|
|
|
|
|
auto tUpload0 = std::chrono::high_resolution_clock::now();
|
|
|
|
|
dev->UpdateBuffer(&voxelDataBuffer_, packedVoxelCache_.data(), cmd,
|
|
|
|
|
totalWords * sizeof(uint32_t));
|
|
|
|
|
auto tUpload1 = std::chrono::high_resolution_clock::now();
|
|
|
|
|
if (profUpload) profUpload->add(std::chrono::duration<float, std::milli>(tUpload1 - tUpload0).count());
|
|
|
|
|
|
|
|
|
|
// Bind resources (shared across all chunk dispatches)
|
|
|
|
|
dev->BindResource(&voxelDataBuffer_, 0, cmd);
|
|
|
|
|
dev->BindUAV(&gpuQuadBuffer_, 0, cmd);
|
|
|
|
|
dev->BindUAV(&gpuQuadCounter_, 1, cmd);
|
|
|
|
|
|
|
|
|
|
// Dispatch for each chunk
|
|
|
|
|
struct MeshPush {
|
|
|
|
|
uint32_t chunkIndex;
|
|
|
|
|
uint32_t voxelBufferOffset;
|
|
|
|
|
uint32_t quadBufferOffset;
|
|
|
|
|
uint32_t maxOutputQuads;
|
|
|
|
|
uint32_t pad[8];
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
auto tDisp0 = std::chrono::high_resolution_clock::now();
|
|
|
|
|
uint32_t chunkIdx = 0;
|
|
|
|
|
world.forEachChunk([&](const ChunkPos& pos, const Chunk& chunk) {
|
|
|
|
|
MeshPush pushData = {};
|
|
|
|
|
pushData.chunkIndex = chunkIdx;
|
|
|
|
|
pushData.voxelBufferOffset = chunkIdx * wordsPerChunk;
|
|
|
|
|
pushData.quadBufferOffset = 0; // global atomic counter handles offsets
|
|
|
|
|
pushData.maxOutputQuads = MEGA_BUFFER_CAPACITY;
|
|
|
|
|
dev->PushConstants(&pushData, sizeof(pushData), cmd);
|
|
|
|
|
|
|
|
|
|
// Dispatch: 32/8 = 4 groups per axis → 64 groups per chunk
|
|
|
|
|
dev->Dispatch(4, 4, 4, cmd);
|
|
|
|
|
chunkIdx++;
|
|
|
|
|
});
|
|
|
|
|
auto tDisp1 = std::chrono::high_resolution_clock::now();
|
|
|
|
|
if (profDispatch) profDispatch->add(std::chrono::duration<float, std::milli>(tDisp1 - tDisp0).count());
|
|
|
|
|
|
|
|
|
|
// Barriers: UAV → COPY_SRC for counter readback, UAV → SRV for quad buffer (rendering)
|
|
|
|
|
GPUBarrier postBarriers[] = {
|
|
|
|
|
GPUBarrier::Buffer(&gpuQuadCounter_, ResourceState::UNORDERED_ACCESS, ResourceState::COPY_SRC),
|
|
|
|
|
GPUBarrier::Buffer(&gpuQuadBuffer_, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE),
|
|
|
|
|
};
|
|
|
|
|
dev->Barrier(postBarriers, 2, cmd);
|
|
|
|
|
|
|
|
|
|
// Copy quad counter to readback buffer (result available next frame)
|
|
|
|
|
dev->CopyBuffer(&meshCounterReadback_, 0, &gpuQuadCounter_, 0, sizeof(uint32_t), cmd);
|
|
|
|
|
|
|
|
|
|
totalQuads_ = gpuMeshQuadCount_; // display previous frame's count in HUD
|
|
|
|
|
gpuMeshDirty_ = false;
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-25 14:50:55 +01:00
|
|
|
// ── Frustum plane extraction (Gribb-Hartmann method) ────────────
|
|
|
|
|
static void extractFrustumPlanes(const XMMATRIX& vp, XMFLOAT4 planes[6]) {
|
|
|
|
|
XMFLOAT4X4 m;
|
|
|
|
|
XMStoreFloat4x4(&m, vp);
|
|
|
|
|
|
|
|
|
|
// Left
|
|
|
|
|
planes[0] = XMFLOAT4(m._14 + m._11, m._24 + m._21, m._34 + m._31, m._44 + m._41);
|
|
|
|
|
// Right
|
|
|
|
|
planes[1] = XMFLOAT4(m._14 - m._11, m._24 - m._21, m._34 - m._31, m._44 - m._41);
|
|
|
|
|
// Bottom
|
|
|
|
|
planes[2] = XMFLOAT4(m._14 + m._12, m._24 + m._22, m._34 + m._32, m._44 + m._42);
|
|
|
|
|
// Top
|
|
|
|
|
planes[3] = XMFLOAT4(m._14 - m._12, m._24 - m._22, m._34 - m._32, m._44 - m._42);
|
|
|
|
|
// Near
|
|
|
|
|
planes[4] = XMFLOAT4(m._13, m._23, m._33, m._43);
|
|
|
|
|
// Far
|
|
|
|
|
planes[5] = XMFLOAT4(m._14 - m._13, m._24 - m._23, m._34 - m._33, m._44 - m._43);
|
|
|
|
|
|
|
|
|
|
// Normalize each plane
|
|
|
|
|
for (int i = 0; i < 6; i++) {
|
|
|
|
|
float len = std::sqrt(planes[i].x * planes[i].x +
|
|
|
|
|
planes[i].y * planes[i].y +
|
|
|
|
|
planes[i].z * planes[i].z);
|
|
|
|
|
if (len > 0.0001f) {
|
|
|
|
|
planes[i].x /= len;
|
|
|
|
|
planes[i].y /= len;
|
|
|
|
|
planes[i].z /= len;
|
|
|
|
|
planes[i].w /= len;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-25 14:24:05 +01:00
|
|
|
// ── Render pass ─────────────────────────────────────────────────
|
|
|
|
|
|
|
|
|
|
void VoxelRenderer::render(
|
|
|
|
|
CommandList cmd,
|
|
|
|
|
const wi::scene::CameraComponent& camera,
|
|
|
|
|
const Texture& depthBuffer,
|
|
|
|
|
const Texture& renderTarget
|
|
|
|
|
) const {
|
|
|
|
|
if (!initialized_ || chunkCount_ == 0 || !pso_.IsValid()) return;
|
|
|
|
|
|
|
|
|
|
auto* dev = device_;
|
|
|
|
|
|
2026-03-26 09:05:52 +01:00
|
|
|
// ── GPU Mesh path: quads already dispatched in Render(), just draw ──
|
|
|
|
|
if (gpuMeshEnabled_ && gpuMesherAvailable_) {
|
|
|
|
|
// Upload chunk info only when chunks changed
|
|
|
|
|
if (!cpuChunkInfo_.empty() && chunkInfoDirty_) {
|
|
|
|
|
dev->UpdateBuffer(&chunkInfoBuffer_, cpuChunkInfo_.data(), cmd,
|
|
|
|
|
cpuChunkInfo_.size() * sizeof(GPUChunkInfo));
|
|
|
|
|
chunkInfoDirty_ = false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Per-frame constants
|
|
|
|
|
VoxelConstants cb = {};
|
|
|
|
|
XMMATRIX vpMatrix = camera.GetViewProjection();
|
|
|
|
|
XMStoreFloat4x4(&cb.viewProjection, vpMatrix);
|
|
|
|
|
cb.cameraPosition = XMFLOAT4(camera.Eye.x, camera.Eye.y, camera.Eye.z, 1.0f);
|
|
|
|
|
cb.sunDirection = XMFLOAT4(-0.5f, -0.8f, -0.3f, 0.0f);
|
|
|
|
|
cb.sunColor = XMFLOAT4(1.2f, 1.1f, 0.9f, 1.0f);
|
|
|
|
|
cb.chunkSize = (float)CHUNK_SIZE;
|
|
|
|
|
cb.textureTiling = 0.25f;
|
2026-03-26 12:14:08 +01:00
|
|
|
cb.blendEnabled = 1.0f; // Phase 3: PS-based blending enabled in GPU mesh path
|
|
|
|
|
cb.debugBlend = debugBlend_ ? 1.0f : 0.0f;
|
2026-03-26 09:05:52 +01:00
|
|
|
cb.chunkCount = chunkCount_;
|
2026-03-26 12:47:10 +01:00
|
|
|
// Per-material blend flags (bit N = material N):
|
|
|
|
|
// canBleed: material can overflow visually onto adjacent voxels
|
|
|
|
|
// resistBleed: adjacent materials cannot overflow onto this material
|
Phase 5.1: Naive Surface Nets smooth rendering
Implement CPU-side Naive Surface Nets for smooth voxel surfaces (SmoothStone,
Snow) coexisting with blocky voxels (Grass, Dirt, Stone, Sand).
Key features:
- SmoothMesher with binary SDF, centroid vertex placement, per-axis boundary
clamping to align with blocky grid at smooth↔blocky transitions
- Cross-chunk connectivity: PAD=2 SDF grid, vertex range [-1, CHUNK_SIZE),
canonical edge ownership (no duplicate triangles, no z-fighting)
- Face normals oriented by edge axis+sign (robust with binary SDF, unlike
SDF gradient dot or centroid sampling approaches)
- Y-axis winding fix: sharing cells have different spatial arrangement,
requiring opposite winding from X and Z axes
- GPU mesher treats smooth neighbors as solid (no blocky faces toward smooth)
- Material blending: primary (smooth-only) + secondary (all counts) per vertex
- Dedicated shaders: voxelSmoothVS (vertex pulling t6) + voxelSmoothPS
(triplanar + lerp blending between two materials)
- Separate render pass with LoadOp::LOAD after voxels+topings
- New materials: SmoothStone (mat 6), blocky Stone (mat 3) and Dirt patches
added to world generation for boundary testing
2026-03-27 13:03:55 +01:00
|
|
|
// Material IDs: 1=Grass, 2=Dirt, 3=Stone, 4=Sand, 5=Snow, 6=SmoothStone
|
|
|
|
|
cb.bleedMask = (1u << 1) | (1u << 2) | (1u << 4) | (1u << 5); // Grass, Dirt, Sand, Snow can bleed (NOT Stone/SmoothStone)
|
2026-03-26 12:47:10 +01:00
|
|
|
cb.resistBleedMask = (1u << 1); // Grass resists bleed (she bleeds onto others, not the reverse)
|
2026-03-26 18:58:19 +01:00
|
|
|
cb.windTime = windTime_;
|
2026-03-26 09:05:52 +01:00
|
|
|
dev->UpdateBuffer(&constantBuffer_, &cb, cmd, sizeof(cb));
|
|
|
|
|
|
|
|
|
|
// Render pass
|
|
|
|
|
RenderPassImage rp[] = {
|
|
|
|
|
RenderPassImage::RenderTarget(
|
|
|
|
|
&renderTarget,
|
|
|
|
|
RenderPassImage::LoadOp::CLEAR,
|
|
|
|
|
RenderPassImage::StoreOp::STORE,
|
|
|
|
|
ResourceState::SHADER_RESOURCE,
|
|
|
|
|
ResourceState::SHADER_RESOURCE
|
|
|
|
|
),
|
|
|
|
|
RenderPassImage::DepthStencil(
|
|
|
|
|
&depthBuffer,
|
|
|
|
|
RenderPassImage::LoadOp::CLEAR,
|
|
|
|
|
RenderPassImage::StoreOp::STORE,
|
|
|
|
|
ResourceState::DEPTHSTENCIL,
|
|
|
|
|
ResourceState::DEPTHSTENCIL,
|
|
|
|
|
ResourceState::DEPTHSTENCIL
|
|
|
|
|
),
|
|
|
|
|
};
|
|
|
|
|
dev->RenderPassBegin(rp, 2, cmd);
|
|
|
|
|
|
|
|
|
|
Viewport vp;
|
|
|
|
|
vp.width = (float)renderTarget.GetDesc().width;
|
|
|
|
|
vp.height = (float)renderTarget.GetDesc().height;
|
|
|
|
|
vp.min_depth = 0.0f;
|
|
|
|
|
vp.max_depth = 1.0f;
|
|
|
|
|
dev->BindViewports(1, &vp, cmd);
|
|
|
|
|
|
|
|
|
|
Rect scissor = { 0, 0, (int)vp.width, (int)vp.height };
|
|
|
|
|
dev->BindScissorRects(1, &scissor, cmd);
|
|
|
|
|
|
|
|
|
|
dev->BindPipelineState(&pso_, cmd);
|
|
|
|
|
dev->BindConstantBuffer(&constantBuffer_, 0, cmd);
|
|
|
|
|
dev->BindResource(&gpuQuadBuffer_, 0, cmd); // GPU quads, not mega-buffer
|
|
|
|
|
dev->BindResource(&textureArray_, 1, cmd);
|
|
|
|
|
dev->BindResource(&chunkInfoBuffer_, 2, cmd);
|
2026-03-26 12:14:08 +01:00
|
|
|
dev->BindResource(&voxelDataBuffer_, 3, cmd); // Phase 3: voxel data for PS neighbor lookups
|
2026-03-26 09:05:52 +01:00
|
|
|
dev->BindSampler(&sampler_, 0, cmd);
|
|
|
|
|
|
|
|
|
|
// GPU mesh mode: flags=2, MUST be after BindPipelineState
|
|
|
|
|
struct VoxelPush {
|
|
|
|
|
uint32_t chunkIndex;
|
|
|
|
|
uint32_t quadOffset;
|
|
|
|
|
uint32_t flags;
|
|
|
|
|
uint32_t pad[9];
|
|
|
|
|
};
|
|
|
|
|
VoxelPush pushData = {};
|
|
|
|
|
pushData.flags = 2; // GPU mesh mode
|
|
|
|
|
pushData.quadOffset = 0;
|
|
|
|
|
dev->PushConstants(&pushData, sizeof(pushData), cmd);
|
|
|
|
|
|
|
|
|
|
// Draw using previous frame's quad count (1-frame delay)
|
|
|
|
|
if (gpuMeshQuadCount_ > 0) {
|
|
|
|
|
dev->DrawInstanced(gpuMeshQuadCount_ * 6, 1, 0, 0, cmd);
|
|
|
|
|
drawCalls_ = 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
dev->RenderPassEnd(cmd);
|
|
|
|
|
visibleChunks_ = chunkCount_;
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-25 14:24:05 +01:00
|
|
|
// Upload mega-buffer and chunk info to GPU
|
|
|
|
|
if (!cpuMegaQuads_.empty()) {
|
|
|
|
|
dev->UpdateBuffer(&megaQuadBuffer_, cpuMegaQuads_.data(), cmd,
|
|
|
|
|
cpuMegaQuads_.size() * sizeof(PackedQuad));
|
|
|
|
|
}
|
|
|
|
|
if (!cpuChunkInfo_.empty()) {
|
|
|
|
|
dev->UpdateBuffer(&chunkInfoBuffer_, cpuChunkInfo_.data(), cmd,
|
|
|
|
|
cpuChunkInfo_.size() * sizeof(GPUChunkInfo));
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-25 14:50:55 +01:00
|
|
|
// Per-frame constants (with frustum planes for GPU cull shader)
|
2026-03-25 14:24:05 +01:00
|
|
|
VoxelConstants cb = {};
|
2026-03-25 14:50:55 +01:00
|
|
|
XMMATRIX vpMatrix = camera.GetViewProjection();
|
|
|
|
|
XMStoreFloat4x4(&cb.viewProjection, vpMatrix);
|
2026-03-25 14:24:05 +01:00
|
|
|
cb.cameraPosition = XMFLOAT4(camera.Eye.x, camera.Eye.y, camera.Eye.z, 1.0f);
|
|
|
|
|
cb.sunDirection = XMFLOAT4(-0.5f, -0.8f, -0.3f, 0.0f);
|
|
|
|
|
cb.sunColor = XMFLOAT4(1.2f, 1.1f, 0.9f, 1.0f);
|
|
|
|
|
cb.chunkSize = (float)CHUNK_SIZE;
|
|
|
|
|
cb.textureTiling = 0.25f;
|
2026-03-26 12:14:08 +01:00
|
|
|
cb.blendEnabled = 0.0f; // Phase 3: blending disabled in CPU/MDI paths (no voxel data SRV)
|
|
|
|
|
cb.debugBlend = 0.0f;
|
2026-03-26 12:47:10 +01:00
|
|
|
cb.bleedMask = 0;
|
|
|
|
|
cb.resistBleedMask = 0;
|
2026-03-26 18:58:19 +01:00
|
|
|
cb.windTime = windTime_;
|
2026-03-25 14:24:05 +01:00
|
|
|
cb.chunkCount = chunkCount_;
|
2026-03-25 14:50:55 +01:00
|
|
|
extractFrustumPlanes(vpMatrix, cb.frustumPlanes);
|
2026-03-25 14:24:05 +01:00
|
|
|
dev->UpdateBuffer(&constantBuffer_, &cb, cmd, sizeof(cb));
|
|
|
|
|
|
2026-03-25 14:50:55 +01:00
|
|
|
// Push constant structure (must be 48 bytes = 12 x uint32, matches b999)
|
|
|
|
|
struct VoxelPush {
|
|
|
|
|
uint32_t chunkIndex;
|
|
|
|
|
uint32_t quadOffset;
|
|
|
|
|
uint32_t flags; // bit 0: 1=MDI mode, 0=CPU mode
|
|
|
|
|
uint32_t pad[9];
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
visibleChunks_ = 0;
|
|
|
|
|
drawCalls_ = 0;
|
|
|
|
|
|
|
|
|
|
// ── GPU Cull + MDI path ────────────────────────────────────────
|
|
|
|
|
if (gpuCullingEnabled_) {
|
2026-03-25 22:30:50 +01:00
|
|
|
// DX12 buffer decay: all buffers return to COMMON after ExecuteCommandLists.
|
|
|
|
|
// So every frame starts clean — no cross-frame state tracking needed.
|
|
|
|
|
|
|
|
|
|
// Zero the draw count via UpdateBuffer (COMMON → COPY_DST implicit promotion)
|
2026-03-25 14:50:55 +01:00
|
|
|
uint32_t zero = 0;
|
|
|
|
|
dev->UpdateBuffer(&drawCountBuffer_, &zero, cmd, sizeof(uint32_t));
|
|
|
|
|
|
2026-03-25 22:30:50 +01:00
|
|
|
// Barriers to UAV for compute shader writes:
|
|
|
|
|
// - drawCountBuffer_: COPY_DST → UAV (was promoted to COPY_DST by UpdateBuffer)
|
|
|
|
|
// - indirectArgsBuffer_: COMMON → UAV (explicit, required because COMMON can't
|
|
|
|
|
// be implicitly promoted to UAV)
|
2026-03-25 14:50:55 +01:00
|
|
|
GPUBarrier preBarriers[] = {
|
|
|
|
|
GPUBarrier::Buffer(&drawCountBuffer_, ResourceState::COPY_DST, ResourceState::UNORDERED_ACCESS),
|
2026-03-25 22:30:50 +01:00
|
|
|
GPUBarrier::Buffer(&indirectArgsBuffer_, ResourceState::UNDEFINED, ResourceState::UNORDERED_ACCESS),
|
2026-03-25 14:50:55 +01:00
|
|
|
};
|
|
|
|
|
dev->Barrier(preBarriers, 2, cmd);
|
|
|
|
|
|
|
|
|
|
// Timestamp: cull begin
|
|
|
|
|
dev->QueryEnd(×tampHeap_, TS_CULL_BEGIN, cmd);
|
|
|
|
|
|
|
|
|
|
// Dispatch GPU frustum + backface cull compute shader
|
|
|
|
|
dev->BindComputeShader(&cullShader_, cmd);
|
|
|
|
|
dev->BindConstantBuffer(&constantBuffer_, 0, cmd);
|
|
|
|
|
dev->BindResource(&chunkInfoBuffer_, 2, cmd);
|
|
|
|
|
dev->BindUAV(&indirectArgsBuffer_, 0, cmd);
|
|
|
|
|
dev->BindUAV(&drawCountBuffer_, 1, cmd);
|
|
|
|
|
dev->Dispatch((chunkCount_ + 63) / 64, 1, 1, cmd);
|
|
|
|
|
|
|
|
|
|
// Timestamp: cull end
|
|
|
|
|
dev->QueryEnd(×tampHeap_, TS_CULL_END, cmd);
|
|
|
|
|
|
|
|
|
|
// Barriers: UAV → INDIRECT_ARGUMENT for DrawInstancedIndirectCount
|
|
|
|
|
GPUBarrier postBarriers[] = {
|
|
|
|
|
GPUBarrier::Buffer(&indirectArgsBuffer_, ResourceState::UNORDERED_ACCESS, ResourceState::INDIRECT_ARGUMENT),
|
|
|
|
|
GPUBarrier::Buffer(&drawCountBuffer_, ResourceState::UNORDERED_ACCESS, ResourceState::INDIRECT_ARGUMENT),
|
|
|
|
|
};
|
|
|
|
|
dev->Barrier(postBarriers, 2, cmd);
|
|
|
|
|
|
|
|
|
|
// ── Render pass ────────────────────────────────────────────
|
|
|
|
|
RenderPassImage rp[] = {
|
|
|
|
|
RenderPassImage::RenderTarget(
|
|
|
|
|
&renderTarget,
|
|
|
|
|
RenderPassImage::LoadOp::CLEAR,
|
|
|
|
|
RenderPassImage::StoreOp::STORE,
|
|
|
|
|
ResourceState::SHADER_RESOURCE,
|
|
|
|
|
ResourceState::SHADER_RESOURCE
|
|
|
|
|
),
|
|
|
|
|
RenderPassImage::DepthStencil(
|
|
|
|
|
&depthBuffer,
|
|
|
|
|
RenderPassImage::LoadOp::CLEAR,
|
|
|
|
|
RenderPassImage::StoreOp::STORE,
|
|
|
|
|
ResourceState::DEPTHSTENCIL,
|
|
|
|
|
ResourceState::DEPTHSTENCIL,
|
|
|
|
|
ResourceState::DEPTHSTENCIL
|
|
|
|
|
),
|
|
|
|
|
};
|
|
|
|
|
dev->RenderPassBegin(rp, 2, cmd);
|
|
|
|
|
|
|
|
|
|
Viewport vp;
|
|
|
|
|
vp.width = (float)renderTarget.GetDesc().width;
|
|
|
|
|
vp.height = (float)renderTarget.GetDesc().height;
|
|
|
|
|
vp.min_depth = 0.0f;
|
|
|
|
|
vp.max_depth = 1.0f;
|
|
|
|
|
dev->BindViewports(1, &vp, cmd);
|
|
|
|
|
|
|
|
|
|
Rect scissor = { 0, 0, (int)vp.width, (int)vp.height };
|
|
|
|
|
dev->BindScissorRects(1, &scissor, cmd);
|
|
|
|
|
|
|
|
|
|
dev->BindPipelineState(&pso_, cmd);
|
|
|
|
|
dev->BindConstantBuffer(&constantBuffer_, 0, cmd);
|
|
|
|
|
dev->BindResource(&megaQuadBuffer_, 0, cmd);
|
|
|
|
|
dev->BindResource(&textureArray_, 1, cmd);
|
|
|
|
|
dev->BindResource(&chunkInfoBuffer_, 2, cmd);
|
|
|
|
|
dev->BindSampler(&sampler_, 0, cmd);
|
|
|
|
|
|
2026-03-25 22:30:50 +01:00
|
|
|
// IMPORTANT: PushConstants must be called AFTER BindPipelineState.
|
|
|
|
|
// Wicked Engine's PushConstants uses SetGraphicsRoot32BitConstants only
|
|
|
|
|
// when active_pso is set. If called before (with active_cs from compute),
|
|
|
|
|
// it would set COMPUTE push constants instead of GRAPHICS ones.
|
|
|
|
|
VoxelPush pushData = {};
|
|
|
|
|
pushData.flags = 1; // MDI mode
|
|
|
|
|
dev->PushConstants(&pushData, sizeof(pushData), cmd);
|
|
|
|
|
|
2026-03-25 14:50:55 +01:00
|
|
|
// Timestamp: draw begin
|
|
|
|
|
dev->QueryEnd(×tampHeap_, TS_DRAW_BEGIN, cmd);
|
|
|
|
|
|
|
|
|
|
// Single MDI call: GPU cull shader filled the indirect args
|
|
|
|
|
dev->DrawInstancedIndirectCount(
|
|
|
|
|
&indirectArgsBuffer_, 0,
|
|
|
|
|
&drawCountBuffer_, 0,
|
|
|
|
|
MAX_DRAWS, cmd
|
|
|
|
|
);
|
|
|
|
|
drawCalls_ = 1;
|
|
|
|
|
|
|
|
|
|
// Timestamp: draw end
|
|
|
|
|
dev->QueryEnd(×tampHeap_, TS_DRAW_END, cmd);
|
|
|
|
|
|
|
|
|
|
dev->RenderPassEnd(cmd);
|
|
|
|
|
|
|
|
|
|
// Resolve timestamps for readback (results available next frame)
|
|
|
|
|
dev->QueryResolve(×tampHeap_, 0, TS_COUNT, ×tampReadback_, 0, cmd);
|
|
|
|
|
|
|
|
|
|
// Read back previous frame's timestamps (persistently mapped READBACK buffer)
|
|
|
|
|
uint64_t* tsData = (uint64_t*)timestampReadback_.mapped_data;
|
|
|
|
|
if (tsData) {
|
|
|
|
|
double freq = (double)dev->GetTimestampFrequency();
|
|
|
|
|
if (freq > 0.0 && tsData[TS_CULL_END] > tsData[TS_CULL_BEGIN]) {
|
|
|
|
|
gpuCullTimeMs_ = (float)((double)(tsData[TS_CULL_END] - tsData[TS_CULL_BEGIN]) / freq * 1000.0);
|
|
|
|
|
}
|
|
|
|
|
if (freq > 0.0 && tsData[TS_DRAW_END] > tsData[TS_DRAW_BEGIN]) {
|
|
|
|
|
gpuDrawTimeMs_ = (float)((double)(tsData[TS_DRAW_END] - tsData[TS_DRAW_BEGIN]) / freq * 1000.0);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// GPU cull handles visibility counting — approximate from chunkCount
|
|
|
|
|
visibleChunks_ = chunkCount_; // exact count would require readback of drawCount
|
|
|
|
|
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-25 22:07:22 +01:00
|
|
|
// ── CPU frustum + backface cull (shared by MDI and per-face paths) ──
|
2026-03-25 14:24:05 +01:00
|
|
|
wi::primitive::Frustum frustum;
|
|
|
|
|
frustum.Create(camera.GetViewProjection());
|
|
|
|
|
|
2026-03-25 22:07:22 +01:00
|
|
|
// ── Phase 2.2: CPU-filled indirect args + MDI draw ──────────────
|
|
|
|
|
if (mdiEnabled_) {
|
|
|
|
|
// CPU cull: fill indirect args with visible face groups
|
|
|
|
|
cpuIndirectArgs_.clear();
|
|
|
|
|
uint32_t cpuDrawCount = 0;
|
|
|
|
|
|
|
|
|
|
for (uint32_t i = 0; i < chunkCount_; i++) {
|
|
|
|
|
const auto& slot = chunkSlots_[i];
|
|
|
|
|
if (slot.quadCount == 0) continue;
|
|
|
|
|
|
|
|
|
|
XMFLOAT3 aabbMin(
|
|
|
|
|
(float)(slot.pos.x * CHUNK_SIZE),
|
|
|
|
|
(float)(slot.pos.y * CHUNK_SIZE),
|
|
|
|
|
(float)(slot.pos.z * CHUNK_SIZE)
|
|
|
|
|
);
|
|
|
|
|
XMFLOAT3 aabbMax(
|
|
|
|
|
aabbMin.x + CHUNK_SIZE,
|
|
|
|
|
aabbMin.y + CHUNK_SIZE,
|
|
|
|
|
aabbMin.z + CHUNK_SIZE
|
|
|
|
|
);
|
|
|
|
|
wi::primitive::AABB aabb(aabbMin, aabbMax);
|
|
|
|
|
if (!frustum.CheckBoxFast(aabb)) continue;
|
|
|
|
|
|
|
|
|
|
visibleChunks_++;
|
|
|
|
|
const auto& info = cpuChunkInfo_[i];
|
|
|
|
|
|
|
|
|
|
for (uint32_t f = 0; f < 6; f++) {
|
|
|
|
|
if (info.faceCounts[f] == 0) continue;
|
|
|
|
|
|
|
|
|
|
bool backFacing = false;
|
|
|
|
|
switch (f) {
|
|
|
|
|
case 0: backFacing = (camera.Eye.x < aabbMin.x); break;
|
|
|
|
|
case 1: backFacing = (camera.Eye.x > aabbMax.x); break;
|
|
|
|
|
case 2: backFacing = (camera.Eye.y < aabbMin.y); break;
|
|
|
|
|
case 3: backFacing = (camera.Eye.y > aabbMax.y); break;
|
|
|
|
|
case 4: backFacing = (camera.Eye.z < aabbMin.z); break;
|
|
|
|
|
case 5: backFacing = (camera.Eye.z > aabbMax.z); break;
|
|
|
|
|
}
|
|
|
|
|
if (backFacing) continue;
|
|
|
|
|
|
|
|
|
|
IndirectDrawArgs args = {};
|
|
|
|
|
// Pack chunkIndex (low 16 bits) + faceIndex (high 16 bits) into push constant.
|
|
|
|
|
// The shader unpacks this to look up quadOffset from GPUChunkInfo.
|
|
|
|
|
// We do NOT use startVertexLocation because SV_VertexID may not include it
|
|
|
|
|
// reliably in ExecuteIndirect context.
|
|
|
|
|
args.pushConstant = i | (f << 16);
|
|
|
|
|
args.vertexCountPerInstance = info.faceCounts[f] * 6;
|
|
|
|
|
args.instanceCount = 1;
|
|
|
|
|
args.startVertexLocation = 0;
|
|
|
|
|
args.startInstanceLocation = 0;
|
|
|
|
|
cpuIndirectArgs_.push_back(args);
|
|
|
|
|
cpuDrawCount++;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Upload indirect args and draw count to GPU
|
|
|
|
|
// Note: no explicit barriers needed here. Buffers start in COMMON each frame
|
|
|
|
|
// (DX12 buffer decay after command list execution). COMMON is implicitly
|
|
|
|
|
// promoted to COPY_DST by UpdateBuffer, then to INDIRECT_ARGUMENT by
|
|
|
|
|
// DrawInstancedIndirectCount. This matches Phase 2.1 pattern (no barriers
|
|
|
|
|
// between UpdateBuffer and SRV usage for megaQuadBuffer_/chunkInfoBuffer_).
|
|
|
|
|
if (!cpuIndirectArgs_.empty()) {
|
|
|
|
|
dev->UpdateBuffer(&indirectArgsBuffer_, cpuIndirectArgs_.data(), cmd,
|
|
|
|
|
cpuIndirectArgs_.size() * sizeof(IndirectDrawArgs));
|
|
|
|
|
}
|
|
|
|
|
dev->UpdateBuffer(&drawCountBuffer_, &cpuDrawCount, cmd, sizeof(uint32_t));
|
|
|
|
|
|
|
|
|
|
// ── Render pass ────────────────────────────────────────────
|
|
|
|
|
RenderPassImage rp[] = {
|
|
|
|
|
RenderPassImage::RenderTarget(
|
|
|
|
|
&renderTarget,
|
|
|
|
|
RenderPassImage::LoadOp::CLEAR,
|
|
|
|
|
RenderPassImage::StoreOp::STORE,
|
|
|
|
|
ResourceState::SHADER_RESOURCE,
|
|
|
|
|
ResourceState::SHADER_RESOURCE
|
|
|
|
|
),
|
|
|
|
|
RenderPassImage::DepthStencil(
|
|
|
|
|
&depthBuffer,
|
|
|
|
|
RenderPassImage::LoadOp::CLEAR,
|
|
|
|
|
RenderPassImage::StoreOp::STORE,
|
|
|
|
|
ResourceState::DEPTHSTENCIL,
|
|
|
|
|
ResourceState::DEPTHSTENCIL,
|
|
|
|
|
ResourceState::DEPTHSTENCIL
|
|
|
|
|
),
|
|
|
|
|
};
|
|
|
|
|
dev->RenderPassBegin(rp, 2, cmd);
|
|
|
|
|
|
|
|
|
|
Viewport vp;
|
|
|
|
|
vp.width = (float)renderTarget.GetDesc().width;
|
|
|
|
|
vp.height = (float)renderTarget.GetDesc().height;
|
|
|
|
|
vp.min_depth = 0.0f;
|
|
|
|
|
vp.max_depth = 1.0f;
|
|
|
|
|
dev->BindViewports(1, &vp, cmd);
|
|
|
|
|
|
|
|
|
|
Rect scissor = { 0, 0, (int)vp.width, (int)vp.height };
|
|
|
|
|
dev->BindScissorRects(1, &scissor, cmd);
|
|
|
|
|
|
|
|
|
|
dev->BindPipelineState(&pso_, cmd);
|
|
|
|
|
dev->BindConstantBuffer(&constantBuffer_, 0, cmd);
|
|
|
|
|
dev->BindResource(&megaQuadBuffer_, 0, cmd);
|
|
|
|
|
dev->BindResource(&textureArray_, 1, cmd);
|
|
|
|
|
dev->BindResource(&chunkInfoBuffer_, 2, cmd);
|
|
|
|
|
dev->BindSampler(&sampler_, 0, cmd);
|
|
|
|
|
|
|
|
|
|
// MDI mode: VS uses binary search to find chunk from SV_VertexID
|
|
|
|
|
VoxelPush pushData = {};
|
|
|
|
|
pushData.flags = 1; // MDI mode
|
|
|
|
|
dev->PushConstants(&pushData, sizeof(pushData), cmd);
|
|
|
|
|
|
|
|
|
|
dev->DrawInstancedIndirectCount(
|
|
|
|
|
&indirectArgsBuffer_, 0,
|
|
|
|
|
&drawCountBuffer_, 0,
|
|
|
|
|
MAX_DRAWS, cmd
|
|
|
|
|
);
|
|
|
|
|
drawCalls_ = 1;
|
|
|
|
|
|
|
|
|
|
dev->RenderPassEnd(cmd);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ── Phase 2.1 Fallback: per-face-group DrawInstanced ────────────
|
2026-03-25 14:24:05 +01:00
|
|
|
RenderPassImage rp[] = {
|
|
|
|
|
RenderPassImage::RenderTarget(
|
|
|
|
|
&renderTarget,
|
|
|
|
|
RenderPassImage::LoadOp::CLEAR,
|
|
|
|
|
RenderPassImage::StoreOp::STORE,
|
|
|
|
|
ResourceState::SHADER_RESOURCE,
|
|
|
|
|
ResourceState::SHADER_RESOURCE
|
|
|
|
|
),
|
|
|
|
|
RenderPassImage::DepthStencil(
|
|
|
|
|
&depthBuffer,
|
|
|
|
|
RenderPassImage::LoadOp::CLEAR,
|
|
|
|
|
RenderPassImage::StoreOp::STORE,
|
|
|
|
|
ResourceState::DEPTHSTENCIL,
|
|
|
|
|
ResourceState::DEPTHSTENCIL,
|
|
|
|
|
ResourceState::DEPTHSTENCIL
|
|
|
|
|
),
|
|
|
|
|
};
|
|
|
|
|
dev->RenderPassBegin(rp, 2, cmd);
|
|
|
|
|
|
|
|
|
|
Viewport vp;
|
|
|
|
|
vp.width = (float)renderTarget.GetDesc().width;
|
|
|
|
|
vp.height = (float)renderTarget.GetDesc().height;
|
|
|
|
|
vp.min_depth = 0.0f;
|
|
|
|
|
vp.max_depth = 1.0f;
|
|
|
|
|
dev->BindViewports(1, &vp, cmd);
|
|
|
|
|
|
|
|
|
|
Rect scissor = { 0, 0, (int)vp.width, (int)vp.height };
|
|
|
|
|
dev->BindScissorRects(1, &scissor, cmd);
|
|
|
|
|
|
|
|
|
|
dev->BindPipelineState(&pso_, cmd);
|
|
|
|
|
dev->BindConstantBuffer(&constantBuffer_, 0, cmd);
|
2026-03-25 14:50:55 +01:00
|
|
|
dev->BindResource(&megaQuadBuffer_, 0, cmd);
|
|
|
|
|
dev->BindResource(&textureArray_, 1, cmd);
|
|
|
|
|
dev->BindResource(&chunkInfoBuffer_, 2, cmd);
|
2026-03-25 14:24:05 +01:00
|
|
|
dev->BindSampler(&sampler_, 0, cmd);
|
|
|
|
|
|
|
|
|
|
for (uint32_t i = 0; i < chunkCount_; i++) {
|
|
|
|
|
const auto& slot = chunkSlots_[i];
|
|
|
|
|
if (slot.quadCount == 0) continue;
|
|
|
|
|
|
|
|
|
|
XMFLOAT3 aabbMin(
|
|
|
|
|
(float)(slot.pos.x * CHUNK_SIZE),
|
|
|
|
|
(float)(slot.pos.y * CHUNK_SIZE),
|
|
|
|
|
(float)(slot.pos.z * CHUNK_SIZE)
|
|
|
|
|
);
|
|
|
|
|
XMFLOAT3 aabbMax(
|
|
|
|
|
aabbMin.x + CHUNK_SIZE,
|
|
|
|
|
aabbMin.y + CHUNK_SIZE,
|
|
|
|
|
aabbMin.z + CHUNK_SIZE
|
|
|
|
|
);
|
|
|
|
|
wi::primitive::AABB aabb(aabbMin, aabbMax);
|
|
|
|
|
if (!frustum.CheckBoxFast(aabb)) continue;
|
|
|
|
|
|
|
|
|
|
visibleChunks_++;
|
2026-03-25 14:50:55 +01:00
|
|
|
const auto& info = cpuChunkInfo_[i];
|
|
|
|
|
|
|
|
|
|
for (uint32_t f = 0; f < 6; f++) {
|
|
|
|
|
if (info.faceCounts[f] == 0) continue;
|
|
|
|
|
|
|
|
|
|
bool backFacing = false;
|
|
|
|
|
switch (f) {
|
2026-03-25 22:07:22 +01:00
|
|
|
case 0: backFacing = (camera.Eye.x < aabbMin.x); break;
|
|
|
|
|
case 1: backFacing = (camera.Eye.x > aabbMax.x); break;
|
|
|
|
|
case 2: backFacing = (camera.Eye.y < aabbMin.y); break;
|
|
|
|
|
case 3: backFacing = (camera.Eye.y > aabbMax.y); break;
|
|
|
|
|
case 4: backFacing = (camera.Eye.z < aabbMin.z); break;
|
|
|
|
|
case 5: backFacing = (camera.Eye.z > aabbMax.z); break;
|
2026-03-25 14:50:55 +01:00
|
|
|
}
|
|
|
|
|
if (backFacing) continue;
|
|
|
|
|
|
|
|
|
|
VoxelPush pushData = {};
|
|
|
|
|
pushData.chunkIndex = i;
|
|
|
|
|
pushData.quadOffset = slot.quadOffset + info.faceOffsets[f];
|
|
|
|
|
pushData.flags = 0; // CPU mode
|
|
|
|
|
dev->PushConstants(&pushData, sizeof(pushData), cmd);
|
|
|
|
|
|
|
|
|
|
dev->DrawInstanced(info.faceCounts[f] * 6, 1, 0, 0, cmd);
|
|
|
|
|
drawCalls_++;
|
|
|
|
|
}
|
2026-03-25 14:24:05 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
dev->RenderPassEnd(cmd);
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-26 17:47:08 +01:00
|
|
|
// ── Phase 4: Toping GPU upload + rendering ─────────────────────
|
|
|
|
|
|
|
|
|
|
void VoxelRenderer::uploadTopingData(const TopingSystem& topingSystem) {
|
|
|
|
|
if (!device_ || !topingPso_.IsValid()) return;
|
|
|
|
|
|
|
|
|
|
// Upload mesh vertices (done once, meshes are static)
|
|
|
|
|
const auto& verts = topingSystem.getVertices();
|
|
|
|
|
if (!verts.empty() && !topingVertexBuffer_.IsValid()) {
|
|
|
|
|
GPUBufferDesc vbDesc;
|
|
|
|
|
vbDesc.size = verts.size() * sizeof(TopingVertex);
|
|
|
|
|
vbDesc.bind_flags = BindFlag::SHADER_RESOURCE;
|
|
|
|
|
vbDesc.misc_flags = ResourceMiscFlag::BUFFER_STRUCTURED;
|
|
|
|
|
vbDesc.stride = sizeof(TopingVertex);
|
|
|
|
|
vbDesc.usage = Usage::DEFAULT;
|
|
|
|
|
device_->CreateBuffer(&vbDesc, verts.data(), &topingVertexBuffer_);
|
|
|
|
|
|
|
|
|
|
char msg[128];
|
|
|
|
|
snprintf(msg, sizeof(msg), "Toping: uploaded %zu vertices (%zu bytes)",
|
|
|
|
|
verts.size(), verts.size() * sizeof(TopingVertex));
|
|
|
|
|
wi::backlog::post(msg);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Upload instance positions (re-upload when world changes)
|
|
|
|
|
const auto& instances = topingSystem.getInstances();
|
|
|
|
|
if (instances.empty()) return;
|
|
|
|
|
|
|
|
|
|
// GPU instances are just float3 (12 bytes), sorted by (type, variant) for batched draws.
|
|
|
|
|
// We sort a copy and build a draw group table.
|
|
|
|
|
struct SortedInst {
|
|
|
|
|
float wx, wy, wz;
|
|
|
|
|
uint16_t type, variant;
|
|
|
|
|
};
|
|
|
|
|
std::vector<SortedInst> sorted(instances.size());
|
|
|
|
|
for (size_t i = 0; i < instances.size(); i++) {
|
|
|
|
|
sorted[i] = { instances[i].wx, instances[i].wy, instances[i].wz,
|
|
|
|
|
instances[i].topingType, instances[i].variant };
|
|
|
|
|
}
|
|
|
|
|
std::sort(sorted.begin(), sorted.end(), [](const SortedInst& a, const SortedInst& b) {
|
|
|
|
|
if (a.type != b.type) return a.type < b.type;
|
|
|
|
|
return a.variant < b.variant;
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
// Pack GPU instance data (just float3 positions)
|
|
|
|
|
struct GPUTopingInst { float x, y, z; };
|
|
|
|
|
uint32_t instCount = (uint32_t)std::min(sorted.size(), (size_t)MAX_TOPING_INSTANCES);
|
|
|
|
|
std::vector<GPUTopingInst> gpuInsts(instCount);
|
|
|
|
|
for (uint32_t i = 0; i < instCount; i++) {
|
|
|
|
|
gpuInsts[i] = { sorted[i].wx, sorted[i].wy, sorted[i].wz };
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Create or recreate instance buffer
|
|
|
|
|
GPUBufferDesc ibDesc;
|
|
|
|
|
ibDesc.size = instCount * sizeof(GPUTopingInst);
|
|
|
|
|
ibDesc.bind_flags = BindFlag::SHADER_RESOURCE;
|
|
|
|
|
ibDesc.misc_flags = ResourceMiscFlag::BUFFER_STRUCTURED;
|
|
|
|
|
ibDesc.stride = sizeof(GPUTopingInst);
|
|
|
|
|
ibDesc.usage = Usage::DEFAULT;
|
|
|
|
|
device_->CreateBuffer(&ibDesc, gpuInsts.data(), &topingInstanceBuffer_);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void VoxelRenderer::renderTopings(
|
|
|
|
|
CommandList cmd,
|
|
|
|
|
const TopingSystem& topingSystem,
|
|
|
|
|
const Texture& depthBuffer,
|
|
|
|
|
const Texture& renderTarget
|
|
|
|
|
) const {
|
|
|
|
|
if (!topingPso_.IsValid() || !topingVertexBuffer_.IsValid() ||
|
|
|
|
|
!topingInstanceBuffer_.IsValid()) return;
|
|
|
|
|
|
|
|
|
|
const auto& instances = topingSystem.getInstances();
|
|
|
|
|
const auto& defs = topingSystem.getDefs();
|
|
|
|
|
if (instances.empty()) return;
|
|
|
|
|
|
|
|
|
|
auto* dev = device_;
|
|
|
|
|
|
|
|
|
|
// Open render pass with LOAD (preserve voxel render output)
|
|
|
|
|
RenderPassImage rp[] = {
|
|
|
|
|
RenderPassImage::RenderTarget(
|
|
|
|
|
&renderTarget,
|
|
|
|
|
RenderPassImage::LoadOp::LOAD,
|
|
|
|
|
RenderPassImage::StoreOp::STORE,
|
|
|
|
|
ResourceState::SHADER_RESOURCE,
|
|
|
|
|
ResourceState::SHADER_RESOURCE
|
|
|
|
|
),
|
|
|
|
|
RenderPassImage::DepthStencil(
|
|
|
|
|
&depthBuffer,
|
|
|
|
|
RenderPassImage::LoadOp::LOAD,
|
|
|
|
|
RenderPassImage::StoreOp::STORE,
|
|
|
|
|
ResourceState::DEPTHSTENCIL,
|
|
|
|
|
ResourceState::DEPTHSTENCIL,
|
|
|
|
|
ResourceState::DEPTHSTENCIL
|
|
|
|
|
),
|
|
|
|
|
};
|
|
|
|
|
dev->RenderPassBegin(rp, 2, cmd);
|
|
|
|
|
|
|
|
|
|
// Viewport & scissor
|
|
|
|
|
Viewport vp;
|
|
|
|
|
vp.top_left_x = 0; vp.top_left_y = 0;
|
|
|
|
|
vp.width = (float)renderTarget.GetDesc().width;
|
|
|
|
|
vp.height = (float)renderTarget.GetDesc().height;
|
|
|
|
|
vp.min_depth = 0.0f; vp.max_depth = 1.0f;
|
|
|
|
|
Rect scissor = { 0, 0, (int)renderTarget.GetDesc().width, (int)renderTarget.GetDesc().height };
|
|
|
|
|
dev->BindViewports(1, &vp, cmd);
|
|
|
|
|
dev->BindScissorRects(1, &scissor, cmd);
|
|
|
|
|
|
|
|
|
|
// Bind toping pipeline (MUST be before PushConstants!)
|
|
|
|
|
dev->BindPipelineState(&topingPso_, cmd);
|
|
|
|
|
dev->BindConstantBuffer(&constantBuffer_, 0, cmd);
|
|
|
|
|
dev->BindResource(&textureArray_, 1, cmd);
|
|
|
|
|
dev->BindResource(&topingVertexBuffer_, 4, cmd); // t4
|
|
|
|
|
dev->BindResource(&topingInstanceBuffer_, 5, cmd); // t5
|
|
|
|
|
dev->BindSampler(&sampler_, 0, cmd);
|
|
|
|
|
|
|
|
|
|
// Build sorted draw groups (same sort order as uploadTopingData)
|
|
|
|
|
struct DrawGroup {
|
|
|
|
|
uint16_t type, variant;
|
|
|
|
|
uint32_t instanceOffset, instanceCount;
|
|
|
|
|
};
|
|
|
|
|
struct SortKey { uint16_t type, variant; };
|
|
|
|
|
std::vector<SortKey> sortedKeys(instances.size());
|
|
|
|
|
for (size_t i = 0; i < instances.size(); i++) {
|
|
|
|
|
sortedKeys[i] = { instances[i].topingType, instances[i].variant };
|
|
|
|
|
}
|
|
|
|
|
std::sort(sortedKeys.begin(), sortedKeys.end(), [](const SortKey& a, const SortKey& b) {
|
|
|
|
|
if (a.type != b.type) return a.type < b.type;
|
|
|
|
|
return a.variant < b.variant;
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
// Identify contiguous groups
|
|
|
|
|
std::vector<DrawGroup> groups;
|
|
|
|
|
uint32_t instCount = (uint32_t)std::min(sortedKeys.size(), (size_t)MAX_TOPING_INSTANCES);
|
|
|
|
|
if (instCount > 0) {
|
|
|
|
|
DrawGroup g = { sortedKeys[0].type, sortedKeys[0].variant, 0, 1 };
|
|
|
|
|
for (uint32_t i = 1; i < instCount; i++) {
|
|
|
|
|
if (sortedKeys[i].type == g.type && sortedKeys[i].variant == g.variant) {
|
|
|
|
|
g.instanceCount++;
|
|
|
|
|
} else {
|
|
|
|
|
groups.push_back(g);
|
|
|
|
|
g = { sortedKeys[i].type, sortedKeys[i].variant, i, 1 };
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
groups.push_back(g);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Issue one DrawInstanced per group
|
|
|
|
|
topingDrawCalls_ = 0;
|
|
|
|
|
struct TopingPush {
|
|
|
|
|
uint32_t vertexOffset;
|
|
|
|
|
uint32_t instanceOffset;
|
|
|
|
|
uint32_t materialID;
|
|
|
|
|
uint32_t pad[9];
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
for (const auto& g : groups) {
|
|
|
|
|
if (g.type >= defs.size()) continue;
|
|
|
|
|
const TopingDef& def = defs[g.type];
|
|
|
|
|
const MeshSlice& slice = def.variants[g.variant];
|
|
|
|
|
if (slice.count == 0) continue; // empty mesh (all neighbors present)
|
|
|
|
|
|
|
|
|
|
TopingPush pushData = {};
|
|
|
|
|
pushData.vertexOffset = slice.offset;
|
|
|
|
|
pushData.instanceOffset = g.instanceOffset;
|
|
|
|
|
pushData.materialID = def.materialID;
|
|
|
|
|
dev->PushConstants(&pushData, sizeof(pushData), cmd);
|
|
|
|
|
|
|
|
|
|
dev->DrawInstanced(slice.count, g.instanceCount, 0, 0, cmd);
|
|
|
|
|
topingDrawCalls_++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
dev->RenderPassEnd(cmd);
|
|
|
|
|
}
|
|
|
|
|
|
Phase 5.1: Naive Surface Nets smooth rendering
Implement CPU-side Naive Surface Nets for smooth voxel surfaces (SmoothStone,
Snow) coexisting with blocky voxels (Grass, Dirt, Stone, Sand).
Key features:
- SmoothMesher with binary SDF, centroid vertex placement, per-axis boundary
clamping to align with blocky grid at smooth↔blocky transitions
- Cross-chunk connectivity: PAD=2 SDF grid, vertex range [-1, CHUNK_SIZE),
canonical edge ownership (no duplicate triangles, no z-fighting)
- Face normals oriented by edge axis+sign (robust with binary SDF, unlike
SDF gradient dot or centroid sampling approaches)
- Y-axis winding fix: sharing cells have different spatial arrangement,
requiring opposite winding from X and Z axes
- GPU mesher treats smooth neighbors as solid (no blocky faces toward smooth)
- Material blending: primary (smooth-only) + secondary (all counts) per vertex
- Dedicated shaders: voxelSmoothVS (vertex pulling t6) + voxelSmoothPS
(triplanar + lerp blending between two materials)
- Separate render pass with LoadOp::LOAD after voxels+topings
- New materials: SmoothStone (mat 6), blocky Stone (mat 3) and Dirt patches
added to world generation for boundary testing
2026-03-27 13:03:55 +01:00
|
|
|
// ── Phase 5: Smooth Surface Nets upload + rendering ─────────────
|
|
|
|
|
|
|
|
|
|
void VoxelRenderer::uploadSmoothData(VoxelWorld& world) {
|
|
|
|
|
if (!device_ || !smoothPso_.IsValid()) return;
|
|
|
|
|
|
2026-03-27 14:21:35 +01:00
|
|
|
// Collect all smooth vertices from all chunks, stamping each with its chunkIndex.
|
|
|
|
|
// The chunkIndex must match the order in chunkInfoBuffer_ (assigned by forEachChunk).
|
Phase 5.1: Naive Surface Nets smooth rendering
Implement CPU-side Naive Surface Nets for smooth voxel surfaces (SmoothStone,
Snow) coexisting with blocky voxels (Grass, Dirt, Stone, Sand).
Key features:
- SmoothMesher with binary SDF, centroid vertex placement, per-axis boundary
clamping to align with blocky grid at smooth↔blocky transitions
- Cross-chunk connectivity: PAD=2 SDF grid, vertex range [-1, CHUNK_SIZE),
canonical edge ownership (no duplicate triangles, no z-fighting)
- Face normals oriented by edge axis+sign (robust with binary SDF, unlike
SDF gradient dot or centroid sampling approaches)
- Y-axis winding fix: sharing cells have different spatial arrangement,
requiring opposite winding from X and Z axes
- GPU mesher treats smooth neighbors as solid (no blocky faces toward smooth)
- Material blending: primary (smooth-only) + secondary (all counts) per vertex
- Dedicated shaders: voxelSmoothVS (vertex pulling t6) + voxelSmoothPS
(triplanar + lerp blending between two materials)
- Separate render pass with LoadOp::LOAD after voxels+topings
- New materials: SmoothStone (mat 6), blocky Stone (mat 3) and Dirt patches
added to world generation for boundary testing
2026-03-27 13:03:55 +01:00
|
|
|
std::vector<SmoothVertex> allVerts;
|
2026-03-27 14:21:35 +01:00
|
|
|
allVerts.reserve(64 * 1024);
|
Phase 5.1: Naive Surface Nets smooth rendering
Implement CPU-side Naive Surface Nets for smooth voxel surfaces (SmoothStone,
Snow) coexisting with blocky voxels (Grass, Dirt, Stone, Sand).
Key features:
- SmoothMesher with binary SDF, centroid vertex placement, per-axis boundary
clamping to align with blocky grid at smooth↔blocky transitions
- Cross-chunk connectivity: PAD=2 SDF grid, vertex range [-1, CHUNK_SIZE),
canonical edge ownership (no duplicate triangles, no z-fighting)
- Face normals oriented by edge axis+sign (robust with binary SDF, unlike
SDF gradient dot or centroid sampling approaches)
- Y-axis winding fix: sharing cells have different spatial arrangement,
requiring opposite winding from X and Z axes
- GPU mesher treats smooth neighbors as solid (no blocky faces toward smooth)
- Material blending: primary (smooth-only) + secondary (all counts) per vertex
- Dedicated shaders: voxelSmoothVS (vertex pulling t6) + voxelSmoothPS
(triplanar + lerp blending between two materials)
- Separate render pass with LoadOp::LOAD after voxels+topings
- New materials: SmoothStone (mat 6), blocky Stone (mat 3) and Dirt patches
added to world generation for boundary testing
2026-03-27 13:03:55 +01:00
|
|
|
|
2026-03-27 14:21:35 +01:00
|
|
|
uint32_t chunkIdx = 0;
|
Phase 5.1: Naive Surface Nets smooth rendering
Implement CPU-side Naive Surface Nets for smooth voxel surfaces (SmoothStone,
Snow) coexisting with blocky voxels (Grass, Dirt, Stone, Sand).
Key features:
- SmoothMesher with binary SDF, centroid vertex placement, per-axis boundary
clamping to align with blocky grid at smooth↔blocky transitions
- Cross-chunk connectivity: PAD=2 SDF grid, vertex range [-1, CHUNK_SIZE),
canonical edge ownership (no duplicate triangles, no z-fighting)
- Face normals oriented by edge axis+sign (robust with binary SDF, unlike
SDF gradient dot or centroid sampling approaches)
- Y-axis winding fix: sharing cells have different spatial arrangement,
requiring opposite winding from X and Z axes
- GPU mesher treats smooth neighbors as solid (no blocky faces toward smooth)
- Material blending: primary (smooth-only) + secondary (all counts) per vertex
- Dedicated shaders: voxelSmoothVS (vertex pulling t6) + voxelSmoothPS
(triplanar + lerp blending between two materials)
- Separate render pass with LoadOp::LOAD after voxels+topings
- New materials: SmoothStone (mat 6), blocky Stone (mat 3) and Dirt patches
added to world generation for boundary testing
2026-03-27 13:03:55 +01:00
|
|
|
world.forEachChunk([&](const ChunkPos& pos, Chunk& chunk) {
|
2026-03-27 14:21:35 +01:00
|
|
|
if (chunk.hasSmooth && chunk.smoothVertexCount > 0) {
|
|
|
|
|
for (auto& sv : chunk.smoothVertices) {
|
|
|
|
|
sv.chunkIndex = (uint16_t)chunkIdx;
|
|
|
|
|
}
|
|
|
|
|
allVerts.insert(allVerts.end(),
|
|
|
|
|
chunk.smoothVertices.begin(),
|
|
|
|
|
chunk.smoothVertices.end());
|
|
|
|
|
}
|
|
|
|
|
chunkIdx++;
|
Phase 5.1: Naive Surface Nets smooth rendering
Implement CPU-side Naive Surface Nets for smooth voxel surfaces (SmoothStone,
Snow) coexisting with blocky voxels (Grass, Dirt, Stone, Sand).
Key features:
- SmoothMesher with binary SDF, centroid vertex placement, per-axis boundary
clamping to align with blocky grid at smooth↔blocky transitions
- Cross-chunk connectivity: PAD=2 SDF grid, vertex range [-1, CHUNK_SIZE),
canonical edge ownership (no duplicate triangles, no z-fighting)
- Face normals oriented by edge axis+sign (robust with binary SDF, unlike
SDF gradient dot or centroid sampling approaches)
- Y-axis winding fix: sharing cells have different spatial arrangement,
requiring opposite winding from X and Z axes
- GPU mesher treats smooth neighbors as solid (no blocky faces toward smooth)
- Material blending: primary (smooth-only) + secondary (all counts) per vertex
- Dedicated shaders: voxelSmoothVS (vertex pulling t6) + voxelSmoothPS
(triplanar + lerp blending between two materials)
- Separate render pass with LoadOp::LOAD after voxels+topings
- New materials: SmoothStone (mat 6), blocky Stone (mat 3) and Dirt patches
added to world generation for boundary testing
2026-03-27 13:03:55 +01:00
|
|
|
});
|
|
|
|
|
|
|
|
|
|
smoothVertexCount_ = (uint32_t)std::min(allVerts.size(), (size_t)MAX_SMOOTH_VERTICES);
|
|
|
|
|
|
|
|
|
|
if (smoothVertexCount_ == 0) {
|
|
|
|
|
smoothDirty_ = false;
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Create or recreate vertex buffer
|
|
|
|
|
GPUBufferDesc vbDesc;
|
|
|
|
|
vbDesc.size = smoothVertexCount_ * sizeof(SmoothVertex);
|
|
|
|
|
vbDesc.bind_flags = BindFlag::SHADER_RESOURCE;
|
|
|
|
|
vbDesc.misc_flags = ResourceMiscFlag::BUFFER_STRUCTURED;
|
|
|
|
|
vbDesc.stride = sizeof(SmoothVertex);
|
|
|
|
|
vbDesc.usage = Usage::DEFAULT;
|
|
|
|
|
device_->CreateBuffer(&vbDesc, allVerts.data(), &smoothVertexBuffer_);
|
|
|
|
|
|
|
|
|
|
smoothDirty_ = false;
|
|
|
|
|
|
|
|
|
|
char msg[128];
|
|
|
|
|
snprintf(msg, sizeof(msg), "Smooth: uploaded %u vertices (%u triangles, %.1f KB)",
|
|
|
|
|
smoothVertexCount_, smoothVertexCount_ / 3,
|
|
|
|
|
smoothVertexCount_ * sizeof(SmoothVertex) / 1024.0f);
|
|
|
|
|
wi::backlog::post(msg);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void VoxelRenderer::renderSmooth(
|
|
|
|
|
CommandList cmd,
|
|
|
|
|
const Texture& depthBuffer,
|
|
|
|
|
const Texture& renderTarget
|
|
|
|
|
) const {
|
|
|
|
|
if (!smoothPso_.IsValid() || !smoothVertexBuffer_.IsValid() ||
|
|
|
|
|
smoothVertexCount_ == 0) return;
|
|
|
|
|
|
|
|
|
|
auto* dev = device_;
|
|
|
|
|
|
|
|
|
|
// Open render pass with LOAD (preserve voxel + toping render output)
|
|
|
|
|
RenderPassImage rp[] = {
|
|
|
|
|
RenderPassImage::RenderTarget(
|
|
|
|
|
&renderTarget,
|
|
|
|
|
RenderPassImage::LoadOp::LOAD,
|
|
|
|
|
RenderPassImage::StoreOp::STORE,
|
|
|
|
|
ResourceState::SHADER_RESOURCE,
|
|
|
|
|
ResourceState::SHADER_RESOURCE
|
|
|
|
|
),
|
|
|
|
|
RenderPassImage::DepthStencil(
|
|
|
|
|
&depthBuffer,
|
|
|
|
|
RenderPassImage::LoadOp::LOAD,
|
|
|
|
|
RenderPassImage::StoreOp::STORE,
|
|
|
|
|
ResourceState::DEPTHSTENCIL,
|
|
|
|
|
ResourceState::DEPTHSTENCIL,
|
|
|
|
|
ResourceState::DEPTHSTENCIL
|
|
|
|
|
),
|
|
|
|
|
};
|
|
|
|
|
dev->RenderPassBegin(rp, 2, cmd);
|
|
|
|
|
|
|
|
|
|
// Viewport & scissor
|
|
|
|
|
Viewport vp;
|
|
|
|
|
vp.top_left_x = 0; vp.top_left_y = 0;
|
|
|
|
|
vp.width = (float)renderTarget.GetDesc().width;
|
|
|
|
|
vp.height = (float)renderTarget.GetDesc().height;
|
|
|
|
|
vp.min_depth = 0.0f; vp.max_depth = 1.0f;
|
|
|
|
|
Rect scissor = { 0, 0, (int)renderTarget.GetDesc().width, (int)renderTarget.GetDesc().height };
|
|
|
|
|
dev->BindViewports(1, &vp, cmd);
|
|
|
|
|
dev->BindScissorRects(1, &scissor, cmd);
|
|
|
|
|
|
|
|
|
|
// Bind smooth pipeline (MUST be before PushConstants!)
|
|
|
|
|
dev->BindPipelineState(&smoothPso_, cmd);
|
|
|
|
|
dev->BindConstantBuffer(&constantBuffer_, 0, cmd);
|
|
|
|
|
dev->BindResource(&textureArray_, 1, cmd);
|
2026-03-27 14:21:35 +01:00
|
|
|
dev->BindResource(&chunkInfoBuffer_, 2, cmd); // t2: chunk info for PS voxel lookups
|
|
|
|
|
dev->BindResource(&voxelDataBuffer_, 3, cmd); // t3: voxel data for PS neighbor blending
|
|
|
|
|
dev->BindResource(&smoothVertexBuffer_, 6, cmd); // t6: smooth vertices
|
Phase 5.1: Naive Surface Nets smooth rendering
Implement CPU-side Naive Surface Nets for smooth voxel surfaces (SmoothStone,
Snow) coexisting with blocky voxels (Grass, Dirt, Stone, Sand).
Key features:
- SmoothMesher with binary SDF, centroid vertex placement, per-axis boundary
clamping to align with blocky grid at smooth↔blocky transitions
- Cross-chunk connectivity: PAD=2 SDF grid, vertex range [-1, CHUNK_SIZE),
canonical edge ownership (no duplicate triangles, no z-fighting)
- Face normals oriented by edge axis+sign (robust with binary SDF, unlike
SDF gradient dot or centroid sampling approaches)
- Y-axis winding fix: sharing cells have different spatial arrangement,
requiring opposite winding from X and Z axes
- GPU mesher treats smooth neighbors as solid (no blocky faces toward smooth)
- Material blending: primary (smooth-only) + secondary (all counts) per vertex
- Dedicated shaders: voxelSmoothVS (vertex pulling t6) + voxelSmoothPS
(triplanar + lerp blending between two materials)
- Separate render pass with LoadOp::LOAD after voxels+topings
- New materials: SmoothStone (mat 6), blocky Stone (mat 3) and Dirt patches
added to world generation for boundary testing
2026-03-27 13:03:55 +01:00
|
|
|
dev->BindSampler(&sampler_, 0, cmd);
|
|
|
|
|
|
|
|
|
|
// Push constants (unused by smooth VS, but must be valid 48 bytes)
|
|
|
|
|
struct SmoothPush {
|
|
|
|
|
uint32_t pad[12];
|
|
|
|
|
};
|
|
|
|
|
SmoothPush pushData = {};
|
|
|
|
|
dev->PushConstants(&pushData, sizeof(pushData), cmd);
|
|
|
|
|
|
|
|
|
|
// Single draw call for all smooth vertices
|
|
|
|
|
dev->DrawInstanced(smoothVertexCount_, 1, 0, 0, cmd);
|
|
|
|
|
smoothDrawCalls_ = 1;
|
|
|
|
|
|
|
|
|
|
dev->RenderPassEnd(cmd);
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-25 14:24:05 +01:00
|
|
|
// ── VoxelRenderPath (custom RenderPath3D) ───────────────────────
|
|
|
|
|
|
|
|
|
|
void VoxelRenderPath::Start() {
|
|
|
|
|
RenderPath3D::Start();
|
|
|
|
|
|
|
|
|
|
auto* device = wi::graphics::GetDevice();
|
|
|
|
|
renderer.initialize(device);
|
|
|
|
|
renderer.debugFaceColors_ = debugMode;
|
|
|
|
|
|
|
|
|
|
// Generate world
|
2026-03-27 14:21:35 +01:00
|
|
|
if (debugSmooth) {
|
|
|
|
|
world.generateDebugSmooth();
|
|
|
|
|
cameraPos = { 15.0f, 12.0f, -5.0f };
|
|
|
|
|
cameraPitch = -0.5f;
|
|
|
|
|
cameraYaw = 0.8f;
|
|
|
|
|
} else if (debugMode) {
|
2026-03-25 14:24:05 +01:00
|
|
|
world.generateDebug();
|
|
|
|
|
cameraPos = { 10.0f, 10.0f, 0.0f };
|
|
|
|
|
cameraPitch = -0.4f;
|
|
|
|
|
cameraYaw = 0.5f;
|
|
|
|
|
} else {
|
|
|
|
|
world.generateAround(cameraPos.x, cameraPos.y, cameraPos.z, 4);
|
|
|
|
|
}
|
|
|
|
|
if (renderer.isInitialized()) {
|
|
|
|
|
renderer.updateMeshes(world);
|
|
|
|
|
}
|
2026-03-26 15:27:15 +01:00
|
|
|
|
2026-03-26 17:47:08 +01:00
|
|
|
// Phase 4: Initialize toping system, collect instances, upload to GPU
|
2026-03-26 15:27:15 +01:00
|
|
|
topingSystem.initialize();
|
|
|
|
|
topingSystem.collectInstances(world);
|
2026-03-26 17:47:08 +01:00
|
|
|
if (renderer.isInitialized()) {
|
|
|
|
|
renderer.uploadTopingData(topingSystem);
|
|
|
|
|
}
|
2026-03-26 15:27:15 +01:00
|
|
|
{
|
|
|
|
|
char msg[256];
|
|
|
|
|
snprintf(msg, sizeof(msg),
|
|
|
|
|
"TopingSystem: %zu defs, %zu vertices, %zu instances",
|
|
|
|
|
topingSystem.getDefCount(),
|
|
|
|
|
topingSystem.getVertexCount(),
|
|
|
|
|
topingSystem.getInstanceCount());
|
|
|
|
|
wi::backlog::post(msg);
|
|
|
|
|
}
|
|
|
|
|
|
Phase 5.1: Naive Surface Nets smooth rendering
Implement CPU-side Naive Surface Nets for smooth voxel surfaces (SmoothStone,
Snow) coexisting with blocky voxels (Grass, Dirt, Stone, Sand).
Key features:
- SmoothMesher with binary SDF, centroid vertex placement, per-axis boundary
clamping to align with blocky grid at smooth↔blocky transitions
- Cross-chunk connectivity: PAD=2 SDF grid, vertex range [-1, CHUNK_SIZE),
canonical edge ownership (no duplicate triangles, no z-fighting)
- Face normals oriented by edge axis+sign (robust with binary SDF, unlike
SDF gradient dot or centroid sampling approaches)
- Y-axis winding fix: sharing cells have different spatial arrangement,
requiring opposite winding from X and Z axes
- GPU mesher treats smooth neighbors as solid (no blocky faces toward smooth)
- Material blending: primary (smooth-only) + secondary (all counts) per vertex
- Dedicated shaders: voxelSmoothVS (vertex pulling t6) + voxelSmoothPS
(triplanar + lerp blending between two materials)
- Separate render pass with LoadOp::LOAD after voxels+topings
- New materials: SmoothStone (mat 6), blocky Stone (mat 3) and Dirt patches
added to world generation for boundary testing
2026-03-27 13:03:55 +01:00
|
|
|
// Phase 5: CPU Surface Nets mesh for smooth voxels, upload to GPU
|
|
|
|
|
if (renderer.isInitialized()) {
|
|
|
|
|
uint32_t totalSmooth = 0;
|
|
|
|
|
uint32_t smoothChunks = 0;
|
|
|
|
|
world.forEachChunk([&](const ChunkPos& pos, Chunk& chunk) {
|
|
|
|
|
uint32_t count = SmoothMesher::meshChunk(chunk, world);
|
|
|
|
|
if (count > 0) {
|
|
|
|
|
totalSmooth += count;
|
|
|
|
|
smoothChunks++;
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
renderer.uploadSmoothData(world);
|
|
|
|
|
char msg[256];
|
|
|
|
|
snprintf(msg, sizeof(msg),
|
|
|
|
|
"SmoothMesher: %u vertices (%u tris) in %u chunks",
|
|
|
|
|
totalSmooth, totalSmooth / 3, smoothChunks);
|
|
|
|
|
wi::backlog::post(msg);
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-25 14:24:05 +01:00
|
|
|
worldGenerated_ = true;
|
|
|
|
|
|
|
|
|
|
setAO(AO_DISABLED);
|
|
|
|
|
setFXAAEnabled(true);
|
|
|
|
|
setBloomEnabled(false);
|
|
|
|
|
|
|
|
|
|
createRenderTargets();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void VoxelRenderPath::createRenderTargets() {
|
|
|
|
|
auto* device = wi::graphics::GetDevice();
|
|
|
|
|
if (!device) return;
|
|
|
|
|
|
|
|
|
|
uint32_t w = GetPhysicalWidth();
|
|
|
|
|
uint32_t h = GetPhysicalHeight();
|
|
|
|
|
if (w == 0 || h == 0) { w = 1920; h = 1080; }
|
|
|
|
|
|
|
|
|
|
wi::graphics::TextureDesc rtDesc;
|
|
|
|
|
rtDesc.type = wi::graphics::TextureDesc::Type::TEXTURE_2D;
|
|
|
|
|
rtDesc.width = w;
|
|
|
|
|
rtDesc.height = h;
|
|
|
|
|
rtDesc.format = wi::graphics::Format::R8G8B8A8_UNORM;
|
|
|
|
|
rtDesc.bind_flags = wi::graphics::BindFlag::RENDER_TARGET | wi::graphics::BindFlag::SHADER_RESOURCE;
|
|
|
|
|
rtDesc.mip_levels = 1;
|
|
|
|
|
rtDesc.sample_count = 1;
|
|
|
|
|
rtDesc.layout = wi::graphics::ResourceState::SHADER_RESOURCE;
|
|
|
|
|
device->CreateTexture(&rtDesc, nullptr, &voxelRT_);
|
|
|
|
|
|
|
|
|
|
wi::graphics::TextureDesc depthDesc;
|
|
|
|
|
depthDesc.type = wi::graphics::TextureDesc::Type::TEXTURE_2D;
|
|
|
|
|
depthDesc.width = w;
|
|
|
|
|
depthDesc.height = h;
|
|
|
|
|
depthDesc.format = wi::graphics::Format::D32_FLOAT;
|
|
|
|
|
depthDesc.bind_flags = wi::graphics::BindFlag::DEPTH_STENCIL | wi::graphics::BindFlag::SHADER_RESOURCE;
|
|
|
|
|
depthDesc.mip_levels = 1;
|
|
|
|
|
depthDesc.sample_count = 1;
|
|
|
|
|
depthDesc.layout = wi::graphics::ResourceState::DEPTHSTENCIL;
|
|
|
|
|
device->CreateTexture(&depthDesc, nullptr, &voxelDepth_);
|
|
|
|
|
|
|
|
|
|
rtCreated_ = voxelRT_.IsValid() && voxelDepth_.IsValid();
|
|
|
|
|
wi::backlog::post("VoxelRenderPath: render targets " + std::string(rtCreated_ ? "OK" : "FAILED")
|
|
|
|
|
+ " (" + std::to_string(w) + "x" + std::to_string(h) + ")");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ── WASD camera input ───────────────────────────────────────────
|
|
|
|
|
|
|
|
|
|
static constexpr wi::input::BUTTON KEY_W = (wi::input::BUTTON)(wi::input::CHARACTER_RANGE_START + ('W' - 'A'));
|
|
|
|
|
static constexpr wi::input::BUTTON KEY_A = (wi::input::BUTTON)(wi::input::CHARACTER_RANGE_START + ('A' - 'A'));
|
|
|
|
|
static constexpr wi::input::BUTTON KEY_S = (wi::input::BUTTON)(wi::input::CHARACTER_RANGE_START + ('S' - 'A'));
|
|
|
|
|
static constexpr wi::input::BUTTON KEY_D = (wi::input::BUTTON)(wi::input::CHARACTER_RANGE_START + ('D' - 'A'));
|
|
|
|
|
|
|
|
|
|
void VoxelRenderPath::handleInput(float dt) {
|
2026-03-26 12:14:08 +01:00
|
|
|
// F2: toggle backlog console
|
|
|
|
|
if (wi::input::Press(wi::input::KEYBOARD_BUTTON_F2)) {
|
|
|
|
|
wi::backlog::Toggle();
|
|
|
|
|
}
|
|
|
|
|
// F3: toggle animated terrain
|
|
|
|
|
if (wi::input::Press(wi::input::KEYBOARD_BUTTON_F3)) {
|
|
|
|
|
animatedTerrain_ = !animatedTerrain_;
|
|
|
|
|
wi::backlog::post(animatedTerrain_ ? "Animation: ON (60 Hz)" : "Animation: OFF");
|
|
|
|
|
}
|
|
|
|
|
// F4: toggle blend debug visualization
|
|
|
|
|
if (wi::input::Press(wi::input::KEYBOARD_BUTTON_F4)) {
|
|
|
|
|
renderer.debugBlend_ = !renderer.debugBlend_;
|
|
|
|
|
wi::backlog::post(renderer.debugBlend_ ? "Blend debug: ON" : "Blend debug: OFF");
|
|
|
|
|
}
|
2026-03-25 14:24:05 +01:00
|
|
|
if (wi::input::Press(wi::input::MOUSE_BUTTON_RIGHT)) {
|
|
|
|
|
mouseCaptured = !mouseCaptured;
|
|
|
|
|
wi::input::HidePointer(mouseCaptured);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (mouseCaptured) {
|
|
|
|
|
auto mouseState = wi::input::GetMouseState();
|
|
|
|
|
cameraYaw += mouseState.delta_position.x * cameraSensitivity;
|
|
|
|
|
cameraPitch += mouseState.delta_position.y * cameraSensitivity;
|
|
|
|
|
cameraPitch = std::clamp(cameraPitch, -1.5f, 1.5f);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
float cosPitch = std::cos(cameraPitch);
|
|
|
|
|
XMFLOAT3 forward(
|
|
|
|
|
std::sin(cameraYaw) * cosPitch,
|
|
|
|
|
-std::sin(cameraPitch),
|
|
|
|
|
std::cos(cameraYaw) * cosPitch
|
|
|
|
|
);
|
|
|
|
|
XMFLOAT3 right(std::cos(cameraYaw), 0.0f, -std::sin(cameraYaw));
|
|
|
|
|
|
|
|
|
|
float speed = cameraSpeed * dt;
|
|
|
|
|
if (wi::input::Down(wi::input::KEYBOARD_BUTTON_LSHIFT)) speed *= 3.0f;
|
|
|
|
|
|
|
|
|
|
if (wi::input::Down(KEY_W)) { cameraPos.x += forward.x * speed; cameraPos.y += forward.y * speed; cameraPos.z += forward.z * speed; }
|
|
|
|
|
if (wi::input::Down(KEY_S)) { cameraPos.x -= forward.x * speed; cameraPos.y -= forward.y * speed; cameraPos.z -= forward.z * speed; }
|
|
|
|
|
if (wi::input::Down(KEY_A)) { cameraPos.x -= right.x * speed; cameraPos.z -= right.z * speed; }
|
|
|
|
|
if (wi::input::Down(KEY_D)) { cameraPos.x += right.x * speed; cameraPos.z += right.z * speed; }
|
|
|
|
|
if (wi::input::Down(wi::input::KEYBOARD_BUTTON_SPACE)) cameraPos.y += speed;
|
|
|
|
|
if (wi::input::Down(wi::input::KEYBOARD_BUTTON_LCONTROL)) cameraPos.y -= speed;
|
|
|
|
|
|
|
|
|
|
camera->Eye = cameraPos;
|
|
|
|
|
camera->At = forward;
|
|
|
|
|
camera->Up = XMFLOAT3(0, 1, 0);
|
|
|
|
|
camera->UpdateCamera();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void VoxelRenderPath::Update(float dt) {
|
2026-03-26 09:05:52 +01:00
|
|
|
auto frameStart = std::chrono::high_resolution_clock::now();
|
2026-03-25 14:24:05 +01:00
|
|
|
lastDt_ = dt;
|
|
|
|
|
float instantFps = (dt > 0.0f) ? (1.0f / dt) : 0.0f;
|
|
|
|
|
smoothFps_ = smoothFps_ * 0.95f + instantFps * 0.05f;
|
|
|
|
|
if (camera) handleInput(dt);
|
2026-03-26 18:58:19 +01:00
|
|
|
windTime_ += dt;
|
|
|
|
|
renderer.windTime_ = windTime_;
|
2026-03-26 09:05:52 +01:00
|
|
|
|
|
|
|
|
// Animated terrain: regenerate at 60 Hz with time-shifted noise
|
|
|
|
|
// Fused: regenerate + pack voxel data in the same parallel pass
|
|
|
|
|
if (animatedTerrain_ && renderer.isInitialized()) {
|
|
|
|
|
animAccum_ += dt;
|
|
|
|
|
if (animAccum_ >= ANIM_INTERVAL) {
|
|
|
|
|
animAccum_ -= ANIM_INTERVAL;
|
|
|
|
|
animTime_ += ANIM_INTERVAL;
|
|
|
|
|
|
|
|
|
|
// Prepare pack cache for fused regenerate+pack
|
|
|
|
|
const uint32_t wordsPerChunk = CHUNK_VOLUME / 2;
|
|
|
|
|
uint32_t totalWords = (uint32_t)world.chunkCount() * wordsPerChunk;
|
|
|
|
|
renderer.packedVoxelCache_.resize(totalWords);
|
|
|
|
|
|
|
|
|
|
auto t0 = std::chrono::high_resolution_clock::now();
|
|
|
|
|
world.regenerateAnimated(animTime_,
|
|
|
|
|
renderer.packedVoxelCache_.data(), totalWords);
|
|
|
|
|
auto t1 = std::chrono::high_resolution_clock::now();
|
|
|
|
|
profRegenerate_.add(std::chrono::duration<float, std::milli>(t1 - t0).count());
|
|
|
|
|
|
|
|
|
|
renderer.voxelCacheDirty_ = false; // cache already filled by fused pack
|
|
|
|
|
renderer.gpuMeshDirty_ = true; // GPU still needs upload + dispatch
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (renderer.isInitialized()) {
|
|
|
|
|
auto t0 = std::chrono::high_resolution_clock::now();
|
|
|
|
|
renderer.updateMeshes(world);
|
|
|
|
|
auto t1 = std::chrono::high_resolution_clock::now();
|
|
|
|
|
profUpdateMeshes_.add(std::chrono::duration<float, std::milli>(t1 - t0).count());
|
|
|
|
|
}
|
2026-03-25 14:24:05 +01:00
|
|
|
RenderPath3D::Update(dt);
|
2026-03-26 09:05:52 +01:00
|
|
|
|
|
|
|
|
// Profiling: accumulate frame time (will be completed in Compose)
|
|
|
|
|
auto frameEnd = std::chrono::high_resolution_clock::now();
|
|
|
|
|
profFrame_.add(std::chrono::duration<float, std::milli>(frameEnd - frameStart).count());
|
|
|
|
|
|
|
|
|
|
// Log averages every 5 seconds
|
|
|
|
|
profTimer_ += dt;
|
|
|
|
|
if (profTimer_ >= PROF_INTERVAL) {
|
|
|
|
|
logProfilingAverages();
|
|
|
|
|
profTimer_ -= PROF_INTERVAL;
|
|
|
|
|
}
|
2026-03-25 14:24:05 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void VoxelRenderPath::Render() const {
|
|
|
|
|
RenderPath3D::Render();
|
|
|
|
|
|
|
|
|
|
if (renderer.isInitialized() && camera && rtCreated_) {
|
|
|
|
|
auto* device = wi::graphics::GetDevice();
|
|
|
|
|
CommandList cmd = device->BeginCommandList();
|
2026-03-25 22:51:22 +01:00
|
|
|
|
2026-03-26 09:05:52 +01:00
|
|
|
// GPU mesh path: only re-dispatch when voxel data changed
|
|
|
|
|
if (renderer.gpuMeshEnabled_ && renderer.gpuMesherAvailable_) {
|
|
|
|
|
// Always readback previous frame's quad count
|
|
|
|
|
uint32_t* countData = (uint32_t*)renderer.meshCounterReadback_.mapped_data;
|
|
|
|
|
if (countData) {
|
|
|
|
|
renderer.gpuMeshQuadCount_ = *countData;
|
|
|
|
|
renderer.totalQuads_ = renderer.gpuMeshQuadCount_;
|
|
|
|
|
}
|
|
|
|
|
// Only re-dispatch compute mesher when data changed
|
|
|
|
|
if (renderer.gpuMeshDirty_) {
|
|
|
|
|
renderer.dispatchGpuMesh(cmd, world,
|
|
|
|
|
&profVoxelPack_, &profGpuUpload_, &profGpuDispatch_);
|
|
|
|
|
}
|
2026-03-25 22:51:22 +01:00
|
|
|
}
|
|
|
|
|
|
2026-03-26 09:05:52 +01:00
|
|
|
// GPU mesh benchmark state machine (runs once after world gen, CPU path only)
|
|
|
|
|
if (!renderer.gpuMeshEnabled_) {
|
|
|
|
|
if (renderer.benchState_ == VoxelRenderer::BenchState::DISPATCH) {
|
|
|
|
|
renderer.dispatchGpuMeshBenchmark(cmd, world);
|
|
|
|
|
} else if (renderer.benchState_ == VoxelRenderer::BenchState::READBACK) {
|
|
|
|
|
renderer.readbackGpuMeshBenchmark();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
auto tRender0 = std::chrono::high_resolution_clock::now();
|
2026-03-25 14:24:05 +01:00
|
|
|
renderer.render(cmd, *camera, voxelDepth_, voxelRT_);
|
2026-03-26 17:47:08 +01:00
|
|
|
|
|
|
|
|
// Phase 4: render topings (separate render pass, preserves voxel output)
|
|
|
|
|
renderer.renderTopings(cmd, topingSystem, voxelDepth_, voxelRT_);
|
Phase 5.1: Naive Surface Nets smooth rendering
Implement CPU-side Naive Surface Nets for smooth voxel surfaces (SmoothStone,
Snow) coexisting with blocky voxels (Grass, Dirt, Stone, Sand).
Key features:
- SmoothMesher with binary SDF, centroid vertex placement, per-axis boundary
clamping to align with blocky grid at smooth↔blocky transitions
- Cross-chunk connectivity: PAD=2 SDF grid, vertex range [-1, CHUNK_SIZE),
canonical edge ownership (no duplicate triangles, no z-fighting)
- Face normals oriented by edge axis+sign (robust with binary SDF, unlike
SDF gradient dot or centroid sampling approaches)
- Y-axis winding fix: sharing cells have different spatial arrangement,
requiring opposite winding from X and Z axes
- GPU mesher treats smooth neighbors as solid (no blocky faces toward smooth)
- Material blending: primary (smooth-only) + secondary (all counts) per vertex
- Dedicated shaders: voxelSmoothVS (vertex pulling t6) + voxelSmoothPS
(triplanar + lerp blending between two materials)
- Separate render pass with LoadOp::LOAD after voxels+topings
- New materials: SmoothStone (mat 6), blocky Stone (mat 3) and Dirt patches
added to world generation for boundary testing
2026-03-27 13:03:55 +01:00
|
|
|
|
|
|
|
|
// Phase 5: render smooth surfaces (separate render pass, preserves all prior output)
|
|
|
|
|
renderer.renderSmooth(cmd, voxelDepth_, voxelRT_);
|
2026-03-26 09:05:52 +01:00
|
|
|
auto tRender1 = std::chrono::high_resolution_clock::now();
|
|
|
|
|
profRender_.add(std::chrono::duration<float, std::milli>(tRender1 - tRender0).count());
|
2026-03-25 14:24:05 +01:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-26 09:05:52 +01:00
|
|
|
void VoxelRenderPath::logProfilingAverages() const {
|
|
|
|
|
char msg[512];
|
|
|
|
|
snprintf(msg, sizeof(msg),
|
|
|
|
|
"=== PERF PROFILE (avg over %.0fs) ===\n"
|
|
|
|
|
" Regenerate: %7.2f ms (%u calls)\n"
|
|
|
|
|
" UpdateMeshes: %7.2f ms (%u calls)\n"
|
|
|
|
|
" VoxelPack: %7.2f ms (%u calls)\n"
|
|
|
|
|
" GPU Upload: %7.2f ms (%u calls)\n"
|
|
|
|
|
" GPU Dispatch: %7.2f ms (%u calls)\n"
|
|
|
|
|
" Render: %7.2f ms (%u calls)\n"
|
|
|
|
|
" Frame (Upd): %7.2f ms (%u calls, %.1f FPS)",
|
|
|
|
|
PROF_INTERVAL,
|
|
|
|
|
profRegenerate_.avg(), profRegenerate_.count,
|
|
|
|
|
profUpdateMeshes_.avg(), profUpdateMeshes_.count,
|
|
|
|
|
profVoxelPack_.avg(), profVoxelPack_.count,
|
|
|
|
|
profGpuUpload_.avg(), profGpuUpload_.count,
|
|
|
|
|
profGpuDispatch_.avg(), profGpuDispatch_.count,
|
|
|
|
|
profRender_.avg(), profRender_.count,
|
|
|
|
|
profFrame_.avg(), profFrame_.count,
|
|
|
|
|
profFrame_.count > 0 ? (1000.0f / profFrame_.avg()) : 0.0f);
|
|
|
|
|
wi::backlog::post(msg);
|
|
|
|
|
|
|
|
|
|
profRegenerate_.reset();
|
|
|
|
|
profUpdateMeshes_.reset();
|
|
|
|
|
profVoxelPack_.reset();
|
|
|
|
|
profGpuUpload_.reset();
|
|
|
|
|
profGpuDispatch_.reset();
|
|
|
|
|
profRender_.reset();
|
|
|
|
|
profFrame_.reset();
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-25 14:24:05 +01:00
|
|
|
void VoxelRenderPath::Compose(CommandList cmd) const {
|
|
|
|
|
frameCount_++;
|
|
|
|
|
|
|
|
|
|
RenderPath3D::Compose(cmd);
|
|
|
|
|
|
|
|
|
|
if (rtCreated_ && voxelRT_.IsValid()) {
|
|
|
|
|
wi::image::Params fx;
|
|
|
|
|
fx.enableFullScreen();
|
|
|
|
|
fx.blendFlag = wi::enums::BLENDMODE_OPAQUE;
|
|
|
|
|
wi::image::Draw(&voxelRT_, fx, cmd);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// HUD overlay
|
|
|
|
|
wi::font::Params fp;
|
|
|
|
|
fp.posX = 10; fp.posY = 10; fp.size = 20;
|
|
|
|
|
fp.color = wi::Color(255, 255, 255, 230);
|
|
|
|
|
fp.shadowColor = wi::Color(0, 0, 0, 180);
|
|
|
|
|
|
|
|
|
|
char fpsStr[16];
|
|
|
|
|
snprintf(fpsStr, sizeof(fpsStr), "%.1f", smoothFps_);
|
|
|
|
|
char dtStr[16];
|
|
|
|
|
snprintf(dtStr, sizeof(dtStr), "%.2f", lastDt_ * 1000.0f);
|
|
|
|
|
|
Phase 5.1: Naive Surface Nets smooth rendering
Implement CPU-side Naive Surface Nets for smooth voxel surfaces (SmoothStone,
Snow) coexisting with blocky voxels (Grass, Dirt, Stone, Sand).
Key features:
- SmoothMesher with binary SDF, centroid vertex placement, per-axis boundary
clamping to align with blocky grid at smooth↔blocky transitions
- Cross-chunk connectivity: PAD=2 SDF grid, vertex range [-1, CHUNK_SIZE),
canonical edge ownership (no duplicate triangles, no z-fighting)
- Face normals oriented by edge axis+sign (robust with binary SDF, unlike
SDF gradient dot or centroid sampling approaches)
- Y-axis winding fix: sharing cells have different spatial arrangement,
requiring opposite winding from X and Z axes
- GPU mesher treats smooth neighbors as solid (no blocky faces toward smooth)
- Material blending: primary (smooth-only) + secondary (all counts) per vertex
- Dedicated shaders: voxelSmoothVS (vertex pulling t6) + voxelSmoothPS
(triplanar + lerp blending between two materials)
- Separate render pass with LoadOp::LOAD after voxels+topings
- New materials: SmoothStone (mat 6), blocky Stone (mat 3) and Dirt patches
added to world generation for boundary testing
2026-03-27 13:03:55 +01:00
|
|
|
std::string stats = "BVLE Voxel Engine (Phase 5 — Smooth Surfaces)\n";
|
2026-03-25 14:24:05 +01:00
|
|
|
stats += "FPS: " + std::string(fpsStr) + " (" + std::string(dtStr) + " ms)\n";
|
|
|
|
|
if (debugMode) {
|
|
|
|
|
stats += "=== DEBUG FACE MODE ===\n";
|
|
|
|
|
stats += "+X=Red -X=DkRed +Y=Green -Y=DkGreen +Z=Blue -Z=DkBlue\n";
|
|
|
|
|
}
|
|
|
|
|
stats += "Chunks: " + std::to_string(renderer.getVisibleChunks())
|
|
|
|
|
+ "/" + std::to_string(renderer.getChunkCount()) + "\n";
|
|
|
|
|
stats += "Quads: " + std::to_string(renderer.getTotalQuads()) + "\n";
|
2026-03-25 22:07:22 +01:00
|
|
|
std::string renderMode;
|
2026-03-26 09:05:52 +01:00
|
|
|
if (renderer.isGpuMeshEnabled())
|
|
|
|
|
renderMode = "GPU mesh (1x1) + DrawInstanced";
|
|
|
|
|
else if (renderer.isGpuCulling())
|
|
|
|
|
renderMode = "CPU greedy + MDI + GPU cull";
|
2026-03-25 22:07:22 +01:00
|
|
|
else if (renderer.isMdiEnabled())
|
2026-03-26 09:05:52 +01:00
|
|
|
renderMode = "CPU greedy + MDI + CPU cull";
|
2026-03-25 22:07:22 +01:00
|
|
|
else
|
2026-03-26 09:05:52 +01:00
|
|
|
renderMode = "CPU greedy + DrawInstanced + CPU cull";
|
2026-03-25 14:24:05 +01:00
|
|
|
stats += "Draw Calls: " + std::to_string(renderer.getDrawCalls())
|
2026-03-25 22:07:22 +01:00
|
|
|
+ " (" + renderMode + ")\n";
|
2026-03-25 14:24:05 +01:00
|
|
|
|
2026-03-26 09:05:52 +01:00
|
|
|
if (renderer.isGpuMeshEnabled()) {
|
|
|
|
|
stats += "GPU Mesh Quads: " + std::to_string(renderer.getGpuMeshQuadCount()) + "\n";
|
|
|
|
|
} else {
|
|
|
|
|
char cullStr[16], drawStr[16];
|
|
|
|
|
snprintf(cullStr, sizeof(cullStr), "%.3f", renderer.getGpuCullTimeMs());
|
|
|
|
|
snprintf(drawStr, sizeof(drawStr), "%.3f", renderer.getGpuDrawTimeMs());
|
|
|
|
|
stats += "GPU Cull: " + std::string(cullStr) + " ms | Draw: " + std::string(drawStr) + " ms\n";
|
|
|
|
|
}
|
2026-03-26 15:27:15 +01:00
|
|
|
stats += "Topings: " + std::to_string(topingSystem.getInstanceCount())
|
2026-03-26 17:47:08 +01:00
|
|
|
+ " instances, " + std::to_string(renderer.getTopingDrawCalls())
|
|
|
|
|
+ " draws (" + std::to_string(topingSystem.getDefCount()) + " types)\n";
|
Phase 5.1: Naive Surface Nets smooth rendering
Implement CPU-side Naive Surface Nets for smooth voxel surfaces (SmoothStone,
Snow) coexisting with blocky voxels (Grass, Dirt, Stone, Sand).
Key features:
- SmoothMesher with binary SDF, centroid vertex placement, per-axis boundary
clamping to align with blocky grid at smooth↔blocky transitions
- Cross-chunk connectivity: PAD=2 SDF grid, vertex range [-1, CHUNK_SIZE),
canonical edge ownership (no duplicate triangles, no z-fighting)
- Face normals oriented by edge axis+sign (robust with binary SDF, unlike
SDF gradient dot or centroid sampling approaches)
- Y-axis winding fix: sharing cells have different spatial arrangement,
requiring opposite winding from X and Z axes
- GPU mesher treats smooth neighbors as solid (no blocky faces toward smooth)
- Material blending: primary (smooth-only) + secondary (all counts) per vertex
- Dedicated shaders: voxelSmoothVS (vertex pulling t6) + voxelSmoothPS
(triplanar + lerp blending between two materials)
- Separate render pass with LoadOp::LOAD after voxels+topings
- New materials: SmoothStone (mat 6), blocky Stone (mat 3) and Dirt patches
added to world generation for boundary testing
2026-03-27 13:03:55 +01:00
|
|
|
if (renderer.getSmoothVertexCount() > 0) {
|
|
|
|
|
stats += "Smooth: " + std::to_string(renderer.getSmoothVertexCount())
|
|
|
|
|
+ " verts (" + std::to_string(renderer.getSmoothVertexCount() / 3)
|
|
|
|
|
+ " tris), " + std::to_string(renderer.getSmoothDrawCalls()) + " draws\n";
|
|
|
|
|
}
|
2026-03-26 12:14:08 +01:00
|
|
|
stats += "WASD+Space/Ctrl: move | Shift: fast | Right-click: capture mouse\n";
|
|
|
|
|
stats += "F2: console | F3: anim [" + std::string(animatedTerrain_ ? "ON" : "OFF")
|
|
|
|
|
+ "] | F4: dbg [" + std::string(renderer.debugBlend_ ? "ON" : "OFF") + "]";
|
2026-03-25 14:24:05 +01:00
|
|
|
|
|
|
|
|
wi::font::Draw(stats, fp, cmd);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
} // namespace voxel
|