#pragma once #include "VoxelWorld.h" #include "VoxelMesher.h" #include "TopingSystem.h" #include "WickedEngine.h" namespace voxel { // ── CPU Profiling accumulator ──────────────────────────────────── struct ProfileAccum { double totalMs = 0.0; uint32_t count = 0; void add(float ms) { totalMs += ms; count++; } float avg() const { return count > 0 ? (float)(totalMs / count) : 0.0f; } void reset() { totalMs = 0.0; count = 0; } }; // ── GPU-visible chunk info (must match HLSL GPUChunkInfo) ──────── struct GPUChunkInfo { XMFLOAT4 worldPos; // xyz = chunk origin, w = debug flag uint32_t quadOffset; // offset into mega quad buffer uint32_t quadCount; // number of quads for this chunk uint32_t pad[2]; // align to 32 bytes uint32_t faceOffsets[6]; // per-face quad offset within this chunk's quads uint32_t faceCounts[6]; // per-face quad count }; // ── Voxel Renderer (Phase 2: mega-buffer + MDI pipeline) ──────── class VoxelRenderer { friend class VoxelRenderPath; public: VoxelRenderer(); ~VoxelRenderer(); void initialize(wi::graphics::GraphicsDevice* device); void shutdown(); // Mesh dirty chunks and repack the mega-buffer void updateMeshes(VoxelWorld& world); // Render all visible chunks void render( wi::graphics::CommandList cmd, const wi::scene::CameraComponent& camera, const wi::graphics::Texture& depthBuffer, const wi::graphics::Texture& renderTarget ) const; // Generate procedural textures for materials void generateTextures(); // Stats uint32_t getTotalQuads() const { return totalQuads_; } uint32_t getVisibleChunks() const { return visibleChunks_; } uint32_t getDrawCalls() const { return drawCalls_; } uint32_t getChunkCount() const { return chunkCount_; } bool isInitialized() const { return initialized_; } bool isGpuCulling() const { return gpuCullingEnabled_; } bool isMdiEnabled() const { return mdiEnabled_; } bool debugFaceColors_ = false; bool debugBlend_ = false; float windTime_ = 0.0f; // set by VoxelRenderPath::Update each frame private: void createPipeline(); void rebuildMegaBuffer(VoxelWorld& world); wi::graphics::GraphicsDevice* device_ = nullptr; // Shaders & Pipeline (voxels) wi::graphics::Shader vertexShader_; wi::graphics::Shader pixelShader_; wi::graphics::PipelineState pso_; wi::graphics::Shader cullShader_; // Frustum cull compute shader // Shaders & Pipeline (topings, Phase 4) wi::graphics::Shader topingVS_; wi::graphics::Shader topingPS_; wi::graphics::PipelineState topingPso_; wi::graphics::GPUBuffer topingVertexBuffer_; // StructuredBuffer, SRV t4 wi::graphics::GPUBuffer topingInstanceBuffer_; // StructuredBuffer, SRV t5 static constexpr uint32_t MAX_TOPING_INSTANCES = 256 * 1024; // 256K instances max mutable uint32_t topingDrawCalls_ = 0; // Shaders & Pipeline (smooth surfaces, Phase 5) wi::graphics::Shader smoothVS_; wi::graphics::Shader smoothPS_; wi::graphics::PipelineState smoothPso_; wi::graphics::GPUBuffer smoothVertexBuffer_; // StructuredBuffer, SRV t6 static constexpr uint32_t MAX_SMOOTH_VERTICES = 4 * 1024 * 1024; // 4M vertices max mutable uint32_t smoothVertexCount_ = 0; mutable uint32_t smoothDrawCalls_ = 0; bool smoothDirty_ = true; // Texture array for materials (256x256, 5 layers for prototype) wi::graphics::Texture textureArray_; wi::graphics::Sampler sampler_; // ── Mega-buffer architecture (Phase 2) ────────────────────── static constexpr uint32_t MEGA_BUFFER_CAPACITY = 2 * 1024 * 1024; // 2M quads max (16 MB) static constexpr uint32_t MAX_CHUNKS = 2048; static constexpr uint32_t MAX_DRAWS = MAX_CHUNKS * 6; // up to 6 face groups per chunk wi::graphics::GPUBuffer megaQuadBuffer_; // StructuredBuffer, SRV t0 wi::graphics::GPUBuffer chunkInfoBuffer_; // StructuredBuffer, SRV t2 // CPU-side tracking struct ChunkSlot { ChunkPos pos; uint32_t quadOffset; // offset into mega-buffer (in quads) uint32_t quadCount; }; std::vector chunkSlots_; std::vector cpuChunkInfo_; std::vector cpuMegaQuads_; // CPU staging for mega-buffer uint32_t chunkCount_ = 0; bool megaBufferDirty_ = true; // ── Indirect draw (Phase 2 MDI) ───────────────────────────── // Wicked Engine's DrawInstancedIndirectCount command signature includes a // push constant (1 × uint32 at b999) BEFORE each D3D12_DRAW_ARGUMENTS. // Total stride = 4 + 16 = 20 bytes per draw entry. struct IndirectDrawArgs { uint32_t pushConstant; // written to b999[0] by ExecuteIndirect uint32_t vertexCountPerInstance; uint32_t instanceCount; uint32_t startVertexLocation; uint32_t startInstanceLocation; }; wi::graphics::GPUBuffer indirectArgsBuffer_; // IndirectDrawArgs[MAX_DRAWS] wi::graphics::GPUBuffer drawCountBuffer_; // uint32_t[1] mutable std::vector cpuIndirectArgs_; bool gpuCullingEnabled_ = true; // Phase 2.3: GPU compute cull (true) vs CPU fallback (false) bool mdiEnabled_ = true; // Phase 2.2: MDI rendering with CPU-filled indirect args // Constants buffer (must match HLSL VoxelCB) struct VoxelConstants { XMFLOAT4X4 viewProjection; XMFLOAT4 cameraPosition; XMFLOAT4 sunDirection; XMFLOAT4 sunColor; float chunkSize; float textureTiling; float blendEnabled; float debugBlend; XMFLOAT4 frustumPlanes[6]; // ax+by+cz+d=0 uint32_t chunkCount; uint32_t bleedMask; // bit N set = material N can bleed onto neighbors uint32_t resistBleedMask; // bit N set = material N resists bleed from neighbors float windTime; }; wi::graphics::GPUBuffer constantBuffer_; // ── GPU Compute Mesher ────────────────────────────────────────── wi::graphics::Shader meshShader_; // voxelMeshCS compute shader mutable wi::graphics::GPUBuffer voxelDataBuffer_; // chunk voxel data (StructuredBuffer) wi::graphics::GPUBuffer gpuQuadBuffer_; // GPU mesh output (RWStructuredBuffer) wi::graphics::GPUBuffer gpuQuadCounter_; // atomic counter for GPU mesh output wi::graphics::GPUBuffer meshCounterReadback_; // READBACK buffer for quad counter bool gpuMesherAvailable_ = false; bool gpuMeshEnabled_ = true; // Use GPU meshing instead of CPU greedy mutable uint32_t gpuMeshQuadCount_ = 0; // Readback from previous frame (1-frame delay) mutable uint32_t voxelDataCapacity_ = 0; // Current capacity of voxelDataBuffer_ (in uint32s) mutable std::vector packedVoxelCache_; // cached packed voxel data for all chunks mutable bool voxelCacheDirty_ = true; // true: packedVoxelCache_ needs repack from chunks mutable bool gpuMeshDirty_ = true; // true: GPU needs upload + re-dispatch mutable bool chunkInfoDirty_ = true; // true: chunkInfoBuffer needs re-upload // Benchmark state machine: runs once after world gen enum class BenchState { IDLE, DISPATCH, READBACK, DONE }; mutable BenchState benchState_ = BenchState::IDLE; mutable float cpuMeshTimeMs_ = 0.0f; mutable uint32_t gpuBaselineQuads_ = 0; void dispatchGpuMeshBenchmark(wi::graphics::CommandList cmd, const VoxelWorld& world) const; void readbackGpuMeshBenchmark() const; void dispatchGpuMesh(wi::graphics::CommandList cmd, const VoxelWorld& world, ProfileAccum* profPack = nullptr, ProfileAccum* profUpload = nullptr, ProfileAccum* profDispatch = nullptr) const; void rebuildChunkInfoOnly(VoxelWorld& world); // ── GPU Timestamp Queries (Phase 2 benchmark) ──────────────── wi::graphics::GPUQueryHeap timestampHeap_; wi::graphics::GPUBuffer timestampReadback_; static constexpr uint32_t TS_CULL_BEGIN = 0; static constexpr uint32_t TS_CULL_END = 1; static constexpr uint32_t TS_DRAW_BEGIN = 2; static constexpr uint32_t TS_DRAW_END = 3; static constexpr uint32_t TS_MESH_BEGIN = 4; static constexpr uint32_t TS_MESH_END = 5; static constexpr uint32_t TS_COUNT = 6; mutable float gpuCullTimeMs_ = 0.0f; mutable float gpuDrawTimeMs_ = 0.0f; mutable float gpuMeshTimeMs_ = 0.0f; // Stats (mutable: updated during const Render() call) mutable uint32_t totalQuads_ = 0; mutable uint32_t visibleChunks_ = 0; mutable uint32_t drawCalls_ = 0; bool initialized_ = false; public: float getGpuCullTimeMs() const { return gpuCullTimeMs_; } float getGpuDrawTimeMs() const { return gpuDrawTimeMs_; } bool isGpuMeshEnabled() const { return gpuMeshEnabled_ && gpuMesherAvailable_; } uint32_t getGpuMeshQuadCount() const { return gpuMeshQuadCount_; } // Phase 4: Toping rendering void uploadTopingData(const TopingSystem& topingSystem); void renderTopings( wi::graphics::CommandList cmd, const TopingSystem& topingSystem, const wi::graphics::Texture& depthBuffer, const wi::graphics::Texture& renderTarget ) const; uint32_t getTopingDrawCalls() const { return topingDrawCalls_; } // Phase 5: Smooth surface rendering void uploadSmoothData(VoxelWorld& world); void renderSmooth( wi::graphics::CommandList cmd, const wi::graphics::Texture& depthBuffer, const wi::graphics::Texture& renderTarget ) const; uint32_t getSmoothVertexCount() const { return smoothVertexCount_; } uint32_t getSmoothDrawCalls() const { return smoothDrawCalls_; } }; // ── Custom RenderPath that integrates voxel rendering ─────────── class VoxelRenderPath : public wi::RenderPath3D { public: VoxelWorld world; VoxelRenderer renderer; TopingSystem topingSystem; bool debugMode = false; float cameraSpeed = 50.0f; float cameraSensitivity = 0.003f; XMFLOAT3 cameraPos = { 256.0f, 100.0f, 256.0f }; float cameraPitch = -0.3f; float cameraYaw = 0.0f; bool mouseCaptured = false; void Start() override; void Update(float dt) override; void Render() const override; void Compose(wi::graphics::CommandList cmd) const override; private: void handleInput(float dt); void createRenderTargets(); mutable bool worldGenerated_ = false; mutable int frameCount_ = 0; mutable float lastDt_ = 0.016f; mutable float smoothFps_ = 60.0f; // Wind animation (continuous, always running) float windTime_ = 0.0f; // Animated terrain (wave effect at 60 Hz, toggled with F3) bool animatedTerrain_ = false; float animTime_ = 0.0f; float animAccum_ = 0.0f; static constexpr float ANIM_INTERVAL = 1.0f / 60.0f; // ~16.7ms = 60 Hz wi::graphics::Texture voxelRT_; wi::graphics::Texture voxelDepth_; mutable bool rtCreated_ = false; // ── CPU Profiling (averages every 5 seconds) ───────────────── mutable ProfileAccum profRegenerate_; // regenerateAnimated mutable ProfileAccum profUpdateMeshes_; // updateMeshes (rebuildChunkInfoOnly or CPU mesh) mutable ProfileAccum profVoxelPack_; // voxel data packing in dispatchGpuMesh mutable ProfileAccum profGpuUpload_; // GPU upload in dispatchGpuMesh mutable ProfileAccum profGpuDispatch_; // compute dispatches in dispatchGpuMesh mutable ProfileAccum profRender_; // render() total mutable ProfileAccum profFrame_; // full frame (Update + Render + Compose) mutable float profTimer_ = 0.0f; static constexpr float PROF_INTERVAL = 5.0f; void logProfilingAverages() const; }; } // namespace voxel