bvle-voxels/src/voxel/VoxelRenderer.h
Samuel Bouchet 9e777d653b Phase 4.1: TopingSystem infrastructure + procedural mesh generation
- TopingSystem with TopingDef registry, procedural mesh gen, instance collection
- 2 toping types: stone bevel (h=0.06, smooth) + grass edge (h=0.12, bumpy)
- 16 mesh variants per type indexed by 4-bit adjacency bitmask (~6 unique with symmetry)
- Wedge cross-section: outer wall + sloped top, grass has sinusoidal height profile
- Instance collection scans exposed +Y faces, checks same-material neighbors
- Cross-chunk adjacency via VoxelWorld::getVoxel()
- Integrated into VoxelRenderPath: init at Start(), stats in HUD
- ~191K instances, 1920 mesh vertices for 170 chunks (validated)
- Research doc (research_connected_meshes.md) + plan (plan_phase4.md)
2026-03-26 15:27:15 +01:00

243 lines
10 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#pragma once
#include "VoxelWorld.h"
#include "VoxelMesher.h"
#include "TopingSystem.h"
#include "WickedEngine.h"
namespace voxel {
// ── CPU Profiling accumulator ────────────────────────────────────
struct ProfileAccum {
double totalMs = 0.0;
uint32_t count = 0;
void add(float ms) { totalMs += ms; count++; }
float avg() const { return count > 0 ? (float)(totalMs / count) : 0.0f; }
void reset() { totalMs = 0.0; count = 0; }
};
// ── GPU-visible chunk info (must match HLSL GPUChunkInfo) ────────
struct GPUChunkInfo {
XMFLOAT4 worldPos; // xyz = chunk origin, w = debug flag
uint32_t quadOffset; // offset into mega quad buffer
uint32_t quadCount; // number of quads for this chunk
uint32_t pad[2]; // align to 32 bytes
uint32_t faceOffsets[6]; // per-face quad offset within this chunk's quads
uint32_t faceCounts[6]; // per-face quad count
};
// ── Voxel Renderer (Phase 2: mega-buffer + MDI pipeline) ────────
class VoxelRenderer {
friend class VoxelRenderPath;
public:
VoxelRenderer();
~VoxelRenderer();
void initialize(wi::graphics::GraphicsDevice* device);
void shutdown();
// Mesh dirty chunks and repack the mega-buffer
void updateMeshes(VoxelWorld& world);
// Render all visible chunks
void render(
wi::graphics::CommandList cmd,
const wi::scene::CameraComponent& camera,
const wi::graphics::Texture& depthBuffer,
const wi::graphics::Texture& renderTarget
) const;
// Generate procedural textures for materials
void generateTextures();
// Stats
uint32_t getTotalQuads() const { return totalQuads_; }
uint32_t getVisibleChunks() const { return visibleChunks_; }
uint32_t getDrawCalls() const { return drawCalls_; }
uint32_t getChunkCount() const { return chunkCount_; }
bool isInitialized() const { return initialized_; }
bool isGpuCulling() const { return gpuCullingEnabled_; }
bool isMdiEnabled() const { return mdiEnabled_; }
bool debugFaceColors_ = false;
bool debugBlend_ = false;
private:
void createPipeline();
void rebuildMegaBuffer(VoxelWorld& world);
wi::graphics::GraphicsDevice* device_ = nullptr;
// Shaders & Pipeline
wi::graphics::Shader vertexShader_;
wi::graphics::Shader pixelShader_;
wi::graphics::PipelineState pso_;
wi::graphics::Shader cullShader_; // Frustum cull compute shader
// Texture array for materials (256x256, 5 layers for prototype)
wi::graphics::Texture textureArray_;
wi::graphics::Sampler sampler_;
// ── Mega-buffer architecture (Phase 2) ──────────────────────
static constexpr uint32_t MEGA_BUFFER_CAPACITY = 2 * 1024 * 1024; // 2M quads max (16 MB)
static constexpr uint32_t MAX_CHUNKS = 2048;
static constexpr uint32_t MAX_DRAWS = MAX_CHUNKS * 6; // up to 6 face groups per chunk
wi::graphics::GPUBuffer megaQuadBuffer_; // StructuredBuffer<PackedQuad>, SRV t0
wi::graphics::GPUBuffer chunkInfoBuffer_; // StructuredBuffer<GPUChunkInfo>, SRV t2
// CPU-side tracking
struct ChunkSlot {
ChunkPos pos;
uint32_t quadOffset; // offset into mega-buffer (in quads)
uint32_t quadCount;
};
std::vector<ChunkSlot> chunkSlots_;
std::vector<GPUChunkInfo> cpuChunkInfo_;
std::vector<PackedQuad> cpuMegaQuads_; // CPU staging for mega-buffer
uint32_t chunkCount_ = 0;
bool megaBufferDirty_ = true;
// ── Indirect draw (Phase 2 MDI) ─────────────────────────────
// Wicked Engine's DrawInstancedIndirectCount command signature includes a
// push constant (1 × uint32 at b999) BEFORE each D3D12_DRAW_ARGUMENTS.
// Total stride = 4 + 16 = 20 bytes per draw entry.
struct IndirectDrawArgs {
uint32_t pushConstant; // written to b999[0] by ExecuteIndirect
uint32_t vertexCountPerInstance;
uint32_t instanceCount;
uint32_t startVertexLocation;
uint32_t startInstanceLocation;
};
wi::graphics::GPUBuffer indirectArgsBuffer_; // IndirectDrawArgs[MAX_DRAWS]
wi::graphics::GPUBuffer drawCountBuffer_; // uint32_t[1]
mutable std::vector<IndirectDrawArgs> cpuIndirectArgs_;
bool gpuCullingEnabled_ = true; // Phase 2.3: GPU compute cull (true) vs CPU fallback (false)
bool mdiEnabled_ = true; // Phase 2.2: MDI rendering with CPU-filled indirect args
// Constants buffer (must match HLSL VoxelCB)
struct VoxelConstants {
XMFLOAT4X4 viewProjection;
XMFLOAT4 cameraPosition;
XMFLOAT4 sunDirection;
XMFLOAT4 sunColor;
float chunkSize;
float textureTiling;
float blendEnabled;
float debugBlend;
XMFLOAT4 frustumPlanes[6]; // ax+by+cz+d=0
uint32_t chunkCount;
uint32_t bleedMask; // bit N set = material N can bleed onto neighbors
uint32_t resistBleedMask; // bit N set = material N resists bleed from neighbors
uint32_t _cullPad2;
};
wi::graphics::GPUBuffer constantBuffer_;
// ── GPU Compute Mesher ──────────────────────────────────────────
wi::graphics::Shader meshShader_; // voxelMeshCS compute shader
mutable wi::graphics::GPUBuffer voxelDataBuffer_; // chunk voxel data (StructuredBuffer<uint>)
wi::graphics::GPUBuffer gpuQuadBuffer_; // GPU mesh output (RWStructuredBuffer<uint2>)
wi::graphics::GPUBuffer gpuQuadCounter_; // atomic counter for GPU mesh output
wi::graphics::GPUBuffer meshCounterReadback_; // READBACK buffer for quad counter
bool gpuMesherAvailable_ = false;
bool gpuMeshEnabled_ = true; // Use GPU meshing instead of CPU greedy
mutable uint32_t gpuMeshQuadCount_ = 0; // Readback from previous frame (1-frame delay)
mutable uint32_t voxelDataCapacity_ = 0; // Current capacity of voxelDataBuffer_ (in uint32s)
mutable std::vector<uint32_t> packedVoxelCache_; // cached packed voxel data for all chunks
mutable bool voxelCacheDirty_ = true; // true: packedVoxelCache_ needs repack from chunks
mutable bool gpuMeshDirty_ = true; // true: GPU needs upload + re-dispatch
mutable bool chunkInfoDirty_ = true; // true: chunkInfoBuffer needs re-upload
// Benchmark state machine: runs once after world gen
enum class BenchState { IDLE, DISPATCH, READBACK, DONE };
mutable BenchState benchState_ = BenchState::IDLE;
mutable float cpuMeshTimeMs_ = 0.0f;
mutable uint32_t gpuBaselineQuads_ = 0;
void dispatchGpuMeshBenchmark(wi::graphics::CommandList cmd, const VoxelWorld& world) const;
void readbackGpuMeshBenchmark() const;
void dispatchGpuMesh(wi::graphics::CommandList cmd, const VoxelWorld& world,
ProfileAccum* profPack = nullptr, ProfileAccum* profUpload = nullptr,
ProfileAccum* profDispatch = nullptr) const;
void rebuildChunkInfoOnly(VoxelWorld& world);
// ── GPU Timestamp Queries (Phase 2 benchmark) ────────────────
wi::graphics::GPUQueryHeap timestampHeap_;
wi::graphics::GPUBuffer timestampReadback_;
static constexpr uint32_t TS_CULL_BEGIN = 0;
static constexpr uint32_t TS_CULL_END = 1;
static constexpr uint32_t TS_DRAW_BEGIN = 2;
static constexpr uint32_t TS_DRAW_END = 3;
static constexpr uint32_t TS_MESH_BEGIN = 4;
static constexpr uint32_t TS_MESH_END = 5;
static constexpr uint32_t TS_COUNT = 6;
mutable float gpuCullTimeMs_ = 0.0f;
mutable float gpuDrawTimeMs_ = 0.0f;
mutable float gpuMeshTimeMs_ = 0.0f;
// Stats (mutable: updated during const Render() call)
mutable uint32_t totalQuads_ = 0;
mutable uint32_t visibleChunks_ = 0;
mutable uint32_t drawCalls_ = 0;
bool initialized_ = false;
public:
float getGpuCullTimeMs() const { return gpuCullTimeMs_; }
float getGpuDrawTimeMs() const { return gpuDrawTimeMs_; }
bool isGpuMeshEnabled() const { return gpuMeshEnabled_ && gpuMesherAvailable_; }
uint32_t getGpuMeshQuadCount() const { return gpuMeshQuadCount_; }
};
// ── Custom RenderPath that integrates voxel rendering ───────────
class VoxelRenderPath : public wi::RenderPath3D {
public:
VoxelWorld world;
VoxelRenderer renderer;
TopingSystem topingSystem;
bool debugMode = false;
float cameraSpeed = 50.0f;
float cameraSensitivity = 0.003f;
XMFLOAT3 cameraPos = { 256.0f, 100.0f, 256.0f };
float cameraPitch = -0.3f;
float cameraYaw = 0.0f;
bool mouseCaptured = false;
void Start() override;
void Update(float dt) override;
void Render() const override;
void Compose(wi::graphics::CommandList cmd) const override;
private:
void handleInput(float dt);
void createRenderTargets();
mutable bool worldGenerated_ = false;
mutable int frameCount_ = 0;
mutable float lastDt_ = 0.016f;
mutable float smoothFps_ = 60.0f;
// Animated terrain (wave effect at 60 Hz, toggled with F3)
bool animatedTerrain_ = false;
float animTime_ = 0.0f;
float animAccum_ = 0.0f;
static constexpr float ANIM_INTERVAL = 1.0f / 60.0f; // ~16.7ms = 60 Hz
wi::graphics::Texture voxelRT_;
wi::graphics::Texture voxelDepth_;
mutable bool rtCreated_ = false;
// ── CPU Profiling (averages every 5 seconds) ─────────────────
mutable ProfileAccum profRegenerate_; // regenerateAnimated
mutable ProfileAccum profUpdateMeshes_; // updateMeshes (rebuildChunkInfoOnly or CPU mesh)
mutable ProfileAccum profVoxelPack_; // voxel data packing in dispatchGpuMesh
mutable ProfileAccum profGpuUpload_; // GPU upload in dispatchGpuMesh
mutable ProfileAccum profGpuDispatch_; // compute dispatches in dispatchGpuMesh
mutable ProfileAccum profRender_; // render() total
mutable ProfileAccum profFrame_; // full frame (Update + Render + Compose)
mutable float profTimer_ = 0.0f;
static constexpr float PROF_INTERVAL = 5.0f;
void logProfilingAverages() const;
};
} // namespace voxel