#include "VoxelRenderer.h" #include "wiPrimitive.h" #include #include using namespace wi::graphics; namespace voxel { // ── VoxelRenderer Implementation ──────────────────────────────── VoxelRenderer::VoxelRenderer() = default; VoxelRenderer::~VoxelRenderer() { shutdown(); } void VoxelRenderer::initialize(GraphicsDevice* dev) { device_ = dev; if (!device_) return; createPipeline(); if (!pso_.IsValid()) { wi::backlog::post("VoxelRenderer: pipeline creation failed", wi::backlog::LogLevel::Error); initialized_ = false; return; } generateTextures(); // Create mega quad buffer (SRV for vertex pulling) GPUBufferDesc megaDesc; megaDesc.size = MEGA_BUFFER_CAPACITY * sizeof(PackedQuad); megaDesc.bind_flags = BindFlag::SHADER_RESOURCE; megaDesc.misc_flags = ResourceMiscFlag::BUFFER_STRUCTURED; megaDesc.stride = sizeof(PackedQuad); megaDesc.usage = Usage::DEFAULT; device_->CreateBuffer(&megaDesc, nullptr, &megaQuadBuffer_); // Create chunk info buffer (SRV for VS chunk lookup) GPUBufferDesc infoDesc; infoDesc.size = MAX_CHUNKS * sizeof(GPUChunkInfo); infoDesc.bind_flags = BindFlag::SHADER_RESOURCE; infoDesc.misc_flags = ResourceMiscFlag::BUFFER_STRUCTURED; infoDesc.stride = sizeof(GPUChunkInfo); infoDesc.usage = Usage::DEFAULT; device_->CreateBuffer(&infoDesc, nullptr, &chunkInfoBuffer_); // Create indirect args buffer (for DrawInstancedIndirectCount, up to 6 draws per chunk) // UAV bind flag needed for GPU cull compute shader to write args GPUBufferDesc argsDesc; argsDesc.size = MAX_DRAWS * sizeof(IndirectDrawArgs); argsDesc.bind_flags = BindFlag::UNORDERED_ACCESS; argsDesc.misc_flags = ResourceMiscFlag::BUFFER_STRUCTURED | ResourceMiscFlag::INDIRECT_ARGS; argsDesc.stride = sizeof(IndirectDrawArgs); argsDesc.usage = Usage::DEFAULT; device_->CreateBuffer(&argsDesc, nullptr, &indirectArgsBuffer_); // Create draw count buffer (single uint32, raw for RWByteAddressBuffer) // UAV bind flag needed for GPU cull compute shader atomic counter GPUBufferDesc countDesc; countDesc.size = sizeof(uint32_t); countDesc.bind_flags = BindFlag::UNORDERED_ACCESS; countDesc.misc_flags = ResourceMiscFlag::BUFFER_RAW | ResourceMiscFlag::INDIRECT_ARGS; countDesc.usage = Usage::DEFAULT; device_->CreateBuffer(&countDesc, nullptr, &drawCountBuffer_); // ── GPU Timestamp Queries ────────────────────────────────────── GPUQueryHeapDesc queryDesc; queryDesc.type = GpuQueryType::TIMESTAMP; queryDesc.query_count = TS_COUNT; device_->CreateQueryHeap(&queryDesc, ×tampHeap_); GPUBufferDesc readbackDesc; readbackDesc.size = TS_COUNT * sizeof(uint64_t); readbackDesc.usage = Usage::READBACK; device_->CreateBuffer(&readbackDesc, nullptr, ×tampReadback_); // ── GPU Compute Mesher resources ───────────────────────────── wi::renderer::LoadShader(ShaderStage::CS, meshShader_, "voxel/voxelMeshCS.cso"); gpuMesherAvailable_ = meshShader_.IsValid(); if (gpuMesherAvailable_) { // Voxel data buffer: 1 chunk's worth (32^3 voxels / 2 per uint = 16384 uint) GPUBufferDesc voxDesc; voxDesc.size = (CHUNK_VOLUME / 2) * sizeof(uint32_t); voxDesc.bind_flags = BindFlag::SHADER_RESOURCE; voxDesc.misc_flags = ResourceMiscFlag::BUFFER_STRUCTURED; voxDesc.stride = sizeof(uint32_t); voxDesc.usage = Usage::DEFAULT; device_->CreateBuffer(&voxDesc, nullptr, &voxelDataBuffer_); // GPU quad output: same capacity as mega-buffer GPUBufferDesc gpuQDesc; gpuQDesc.size = MEGA_BUFFER_CAPACITY * sizeof(uint64_t); // PackedQuad = 8 bytes gpuQDesc.bind_flags = BindFlag::UNORDERED_ACCESS; gpuQDesc.misc_flags = ResourceMiscFlag::BUFFER_STRUCTURED; gpuQDesc.stride = sizeof(uint64_t); // uint2 = 8 bytes gpuQDesc.usage = Usage::DEFAULT; device_->CreateBuffer(&gpuQDesc, nullptr, &gpuQuadBuffer_); // Quad counter GPUBufferDesc cntDesc; cntDesc.size = sizeof(uint32_t); cntDesc.bind_flags = BindFlag::UNORDERED_ACCESS; cntDesc.misc_flags = ResourceMiscFlag::BUFFER_RAW; cntDesc.usage = Usage::DEFAULT; device_->CreateBuffer(&cntDesc, nullptr, &gpuQuadCounter_); wi::backlog::post("VoxelRenderer: GPU compute mesher available"); } else { wi::backlog::post("VoxelRenderer: GPU compute mesher not available", wi::backlog::LogLevel::Warning); } cpuMegaQuads_.reserve(MEGA_BUFFER_CAPACITY); cpuChunkInfo_.reserve(MAX_CHUNKS); chunkSlots_.reserve(MAX_CHUNKS); cpuIndirectArgs_.reserve(MAX_CHUNKS); initialized_ = true; wi::backlog::post("VoxelRenderer: initialized (mega-buffer: " + std::to_string(MEGA_BUFFER_CAPACITY) + " quads capacity)"); } void VoxelRenderer::shutdown() { chunkSlots_.clear(); cpuChunkInfo_.clear(); cpuMegaQuads_.clear(); initialized_ = false; } void VoxelRenderer::createPipeline() { // Constant buffer for per-frame data GPUBufferDesc cbDesc; cbDesc.size = sizeof(VoxelConstants); cbDesc.bind_flags = BindFlag::CONSTANT_BUFFER; cbDesc.usage = Usage::DEFAULT; device_->CreateBuffer(&cbDesc, nullptr, &constantBuffer_); // Anisotropic wrap sampler SamplerDesc samplerDesc; samplerDesc.filter = Filter::ANISOTROPIC; samplerDesc.address_u = TextureAddressMode::WRAP; samplerDesc.address_v = TextureAddressMode::WRAP; samplerDesc.address_w = TextureAddressMode::WRAP; samplerDesc.max_anisotropy = 16; device_->CreateSampler(&samplerDesc, &sampler_); // Load shaders wi::renderer::LoadShader(ShaderStage::VS, vertexShader_, "voxel/voxelVS.cso"); wi::renderer::LoadShader(ShaderStage::PS, pixelShader_, "voxel/voxelPS.cso"); wi::renderer::LoadShader(ShaderStage::CS, cullShader_, "voxel/voxelCullCS.cso"); if (!vertexShader_.IsValid() || !pixelShader_.IsValid()) { wi::backlog::post("VoxelRenderer: shader loading failed", wi::backlog::LogLevel::Error); return; } // GPU cull shader loads but MDI path is disabled pending barrier debugging. // CPU fallback with per-face-group DrawInstanced + backface culling is used instead. gpuCullingEnabled_ = false; if (cullShader_.IsValid()) { wi::backlog::post("VoxelRenderer: cull compute shader compiled (GPU cull path disabled, using CPU fallback)"); } else { wi::backlog::post("VoxelRenderer: cull compute shader not available", wi::backlog::LogLevel::Warning); } // Pipeline: backface cull, depth test, opaque blend, triangle list PipelineStateDesc psoDesc; psoDesc.vs = &vertexShader_; psoDesc.ps = &pixelShader_; psoDesc.rs = wi::renderer::GetRasterizerState(wi::enums::RSTYPE_FRONT); psoDesc.dss = wi::renderer::GetDepthStencilState(wi::enums::DSSTYPE_DEFAULT); psoDesc.bs = wi::renderer::GetBlendState(wi::enums::BSTYPE_OPAQUE); psoDesc.pt = PrimitiveTopology::TRIANGLELIST; device_->CreatePipelineState(&psoDesc, &pso_); } // ── Procedural texture generation ─────────────────────────────── static void generateNoiseTexture(uint8_t* pixels, int w, int h, uint8_t r0, uint8_t g0, uint8_t b0, uint8_t r1, uint8_t g1, uint8_t b1, uint32_t seed) { uint32_t s = seed; for (int y = 0; y < h; y++) { for (int x = 0; x < w; x++) { s = s * 1664525u + 1013904223u; float noise = (float)(s & 0xFFFF) / 65535.0f; float fx = (float)x / w; float fy = (float)y / h; float pattern = 0.5f + 0.5f * std::sin(fx * 20.0f + noise * 3.0f) * std::cos(fy * 20.0f + noise * 3.0f); float t = noise * 0.6f + pattern * 0.4f; int idx = (y * w + x) * 4; pixels[idx + 0] = (uint8_t)(r0 + (r1 - r0) * t); pixels[idx + 1] = (uint8_t)(g0 + (g1 - g0) * t); pixels[idx + 2] = (uint8_t)(b0 + (b1 - b0) * t); pixels[idx + 3] = 255; } } } void VoxelRenderer::generateTextures() { const int TEX_SIZE = 256; const int NUM_MATERIALS = 5; std::vector allPixels(TEX_SIZE * TEX_SIZE * 4 * NUM_MATERIALS); struct MatColor { uint8_t r0,g0,b0, r1,g1,b1; uint32_t seed; }; MatColor colors[NUM_MATERIALS] = { { 60, 140, 40, 80, 180, 60, 101 }, // Grass { 100, 70, 40, 140, 100, 60, 202 }, // Dirt { 110, 110, 105, 140, 140, 130, 303 }, // Stone { 200, 190, 140, 230, 220, 170, 404 }, // Sand { 220, 225, 230, 245, 248, 252, 505 }, // Snow }; for (int i = 0; i < NUM_MATERIALS; i++) { auto& c = colors[i]; generateNoiseTexture( allPixels.data() + i * TEX_SIZE * TEX_SIZE * 4, TEX_SIZE, TEX_SIZE, c.r0, c.g0, c.b0, c.r1, c.g1, c.b1, c.seed ); } TextureDesc texDesc; texDesc.type = TextureDesc::Type::TEXTURE_2D; texDesc.width = TEX_SIZE; texDesc.height = TEX_SIZE; texDesc.array_size = NUM_MATERIALS; texDesc.mip_levels = 1; texDesc.format = Format::R8G8B8A8_UNORM; texDesc.bind_flags = BindFlag::SHADER_RESOURCE; texDesc.usage = Usage::DEFAULT; std::vector subData(NUM_MATERIALS); for (int i = 0; i < NUM_MATERIALS; i++) { subData[i].data_ptr = allPixels.data() + i * TEX_SIZE * TEX_SIZE * 4; subData[i].row_pitch = TEX_SIZE * 4; subData[i].slice_pitch = TEX_SIZE * TEX_SIZE * 4; } device_->CreateTexture(&texDesc, subData.data(), &textureArray_); } // ── Mega-buffer rebuild ───────────────────────────────────────── // Packs all chunk quads contiguously into a single buffer. // Simple strategy: full rebuild whenever any chunk is dirty. void VoxelRenderer::rebuildMegaBuffer(VoxelWorld& world) { cpuMegaQuads_.clear(); chunkSlots_.clear(); cpuChunkInfo_.clear(); uint32_t offset = 0; float debugFlag = debugFaceColors_ ? 1.0f : 0.0f; world.forEachChunk([&](const ChunkPos& pos, Chunk& chunk) { if (chunk.quadCount == 0) return; if (offset + chunk.quadCount > MEGA_BUFFER_CAPACITY) return; // overflow guard ChunkSlot slot; slot.pos = pos; slot.quadOffset = offset; slot.quadCount = chunk.quadCount; chunkSlots_.push_back(slot); GPUChunkInfo info = {}; info.worldPos = XMFLOAT4( (float)(pos.x * CHUNK_SIZE), (float)(pos.y * CHUNK_SIZE), (float)(pos.z * CHUNK_SIZE), debugFlag ); info.quadOffset = offset; info.quadCount = chunk.quadCount; for (int f = 0; f < 6; f++) { info.faceOffsets[f] = chunk.faceOffsets[f]; info.faceCounts[f] = chunk.faceCounts[f]; } cpuChunkInfo_.push_back(info); cpuMegaQuads_.insert(cpuMegaQuads_.end(), chunk.quads.begin(), chunk.quads.end()); offset += chunk.quadCount; }); chunkCount_ = (uint32_t)chunkSlots_.size(); totalQuads_ = offset; } void VoxelRenderer::updateMeshes(VoxelWorld& world) { if (!device_) return; // Re-mesh dirty chunks bool anyDirty = false; world.forEachChunk([&](const ChunkPos& pos, Chunk& chunk) { if (chunk.dirty) { VoxelMesher::meshChunk(chunk, world); anyDirty = true; } }); if (anyDirty || megaBufferDirty_) { rebuildMegaBuffer(world); megaBufferDirty_ = false; } } // ── Frustum plane extraction (Gribb-Hartmann method) ──────────── static void extractFrustumPlanes(const XMMATRIX& vp, XMFLOAT4 planes[6]) { XMFLOAT4X4 m; XMStoreFloat4x4(&m, vp); // Left planes[0] = XMFLOAT4(m._14 + m._11, m._24 + m._21, m._34 + m._31, m._44 + m._41); // Right planes[1] = XMFLOAT4(m._14 - m._11, m._24 - m._21, m._34 - m._31, m._44 - m._41); // Bottom planes[2] = XMFLOAT4(m._14 + m._12, m._24 + m._22, m._34 + m._32, m._44 + m._42); // Top planes[3] = XMFLOAT4(m._14 - m._12, m._24 - m._22, m._34 - m._32, m._44 - m._42); // Near planes[4] = XMFLOAT4(m._13, m._23, m._33, m._43); // Far planes[5] = XMFLOAT4(m._14 - m._13, m._24 - m._23, m._34 - m._33, m._44 - m._43); // Normalize each plane for (int i = 0; i < 6; i++) { float len = std::sqrt(planes[i].x * planes[i].x + planes[i].y * planes[i].y + planes[i].z * planes[i].z); if (len > 0.0001f) { planes[i].x /= len; planes[i].y /= len; planes[i].z /= len; planes[i].w /= len; } } } // ── Render pass ───────────────────────────────────────────────── void VoxelRenderer::render( CommandList cmd, const wi::scene::CameraComponent& camera, const Texture& depthBuffer, const Texture& renderTarget ) const { if (!initialized_ || chunkCount_ == 0 || !pso_.IsValid()) return; auto* dev = device_; // Upload mega-buffer and chunk info to GPU if (!cpuMegaQuads_.empty()) { dev->UpdateBuffer(&megaQuadBuffer_, cpuMegaQuads_.data(), cmd, cpuMegaQuads_.size() * sizeof(PackedQuad)); } if (!cpuChunkInfo_.empty()) { dev->UpdateBuffer(&chunkInfoBuffer_, cpuChunkInfo_.data(), cmd, cpuChunkInfo_.size() * sizeof(GPUChunkInfo)); } // Per-frame constants (with frustum planes for GPU cull shader) VoxelConstants cb = {}; XMMATRIX vpMatrix = camera.GetViewProjection(); XMStoreFloat4x4(&cb.viewProjection, vpMatrix); cb.cameraPosition = XMFLOAT4(camera.Eye.x, camera.Eye.y, camera.Eye.z, 1.0f); cb.sunDirection = XMFLOAT4(-0.5f, -0.8f, -0.3f, 0.0f); cb.sunColor = XMFLOAT4(1.2f, 1.1f, 0.9f, 1.0f); cb.chunkSize = (float)CHUNK_SIZE; cb.textureTiling = 0.25f; cb.chunkCount = chunkCount_; extractFrustumPlanes(vpMatrix, cb.frustumPlanes); dev->UpdateBuffer(&constantBuffer_, &cb, cmd, sizeof(cb)); // Push constant structure (must be 48 bytes = 12 x uint32, matches b999) struct VoxelPush { uint32_t chunkIndex; uint32_t quadOffset; uint32_t flags; // bit 0: 1=MDI mode, 0=CPU mode uint32_t pad[9]; }; visibleChunks_ = 0; drawCalls_ = 0; // ── GPU Cull + MDI path ──────────────────────────────────────── if (gpuCullingEnabled_) { // Zero the draw count buffer (sets state to COPY_DST) uint32_t zero = 0; dev->UpdateBuffer(&drawCountBuffer_, &zero, cmd, sizeof(uint32_t)); // Touch indirect args buffer to establish COPY_DST state dev->UpdateBuffer(&indirectArgsBuffer_, &zero, cmd, sizeof(uint32_t)); // Barriers: COPY_DST → UAV for compute shader writes GPUBarrier preBarriers[] = { GPUBarrier::Buffer(&drawCountBuffer_, ResourceState::COPY_DST, ResourceState::UNORDERED_ACCESS), GPUBarrier::Buffer(&indirectArgsBuffer_, ResourceState::COPY_DST, ResourceState::UNORDERED_ACCESS), }; dev->Barrier(preBarriers, 2, cmd); // Timestamp: cull begin dev->QueryEnd(×tampHeap_, TS_CULL_BEGIN, cmd); // Dispatch GPU frustum + backface cull compute shader dev->BindComputeShader(&cullShader_, cmd); dev->BindConstantBuffer(&constantBuffer_, 0, cmd); dev->BindResource(&chunkInfoBuffer_, 2, cmd); dev->BindUAV(&indirectArgsBuffer_, 0, cmd); dev->BindUAV(&drawCountBuffer_, 1, cmd); dev->Dispatch((chunkCount_ + 63) / 64, 1, 1, cmd); // Timestamp: cull end dev->QueryEnd(×tampHeap_, TS_CULL_END, cmd); // Barriers: UAV → INDIRECT_ARGUMENT for DrawInstancedIndirectCount GPUBarrier postBarriers[] = { GPUBarrier::Buffer(&indirectArgsBuffer_, ResourceState::UNORDERED_ACCESS, ResourceState::INDIRECT_ARGUMENT), GPUBarrier::Buffer(&drawCountBuffer_, ResourceState::UNORDERED_ACCESS, ResourceState::INDIRECT_ARGUMENT), }; dev->Barrier(postBarriers, 2, cmd); // Set MDI flag in push constants (VS uses binary search for chunk index) VoxelPush pushData = {}; pushData.flags = 1; // MDI mode dev->PushConstants(&pushData, sizeof(pushData), cmd); // ── Render pass ──────────────────────────────────────────── RenderPassImage rp[] = { RenderPassImage::RenderTarget( &renderTarget, RenderPassImage::LoadOp::CLEAR, RenderPassImage::StoreOp::STORE, ResourceState::SHADER_RESOURCE, ResourceState::SHADER_RESOURCE ), RenderPassImage::DepthStencil( &depthBuffer, RenderPassImage::LoadOp::CLEAR, RenderPassImage::StoreOp::STORE, ResourceState::DEPTHSTENCIL, ResourceState::DEPTHSTENCIL, ResourceState::DEPTHSTENCIL ), }; dev->RenderPassBegin(rp, 2, cmd); Viewport vp; vp.width = (float)renderTarget.GetDesc().width; vp.height = (float)renderTarget.GetDesc().height; vp.min_depth = 0.0f; vp.max_depth = 1.0f; dev->BindViewports(1, &vp, cmd); Rect scissor = { 0, 0, (int)vp.width, (int)vp.height }; dev->BindScissorRects(1, &scissor, cmd); dev->BindPipelineState(&pso_, cmd); dev->BindConstantBuffer(&constantBuffer_, 0, cmd); dev->BindResource(&megaQuadBuffer_, 0, cmd); dev->BindResource(&textureArray_, 1, cmd); dev->BindResource(&chunkInfoBuffer_, 2, cmd); dev->BindSampler(&sampler_, 0, cmd); // Timestamp: draw begin dev->QueryEnd(×tampHeap_, TS_DRAW_BEGIN, cmd); // Single MDI call: GPU cull shader filled the indirect args dev->DrawInstancedIndirectCount( &indirectArgsBuffer_, 0, &drawCountBuffer_, 0, MAX_DRAWS, cmd ); drawCalls_ = 1; // Timestamp: draw end dev->QueryEnd(×tampHeap_, TS_DRAW_END, cmd); dev->RenderPassEnd(cmd); // Resolve timestamps for readback (results available next frame) dev->QueryResolve(×tampHeap_, 0, TS_COUNT, ×tampReadback_, 0, cmd); // Read back previous frame's timestamps (persistently mapped READBACK buffer) uint64_t* tsData = (uint64_t*)timestampReadback_.mapped_data; if (tsData) { double freq = (double)dev->GetTimestampFrequency(); if (freq > 0.0 && tsData[TS_CULL_END] > tsData[TS_CULL_BEGIN]) { gpuCullTimeMs_ = (float)((double)(tsData[TS_CULL_END] - tsData[TS_CULL_BEGIN]) / freq * 1000.0); } if (freq > 0.0 && tsData[TS_DRAW_END] > tsData[TS_DRAW_BEGIN]) { gpuDrawTimeMs_ = (float)((double)(tsData[TS_DRAW_END] - tsData[TS_DRAW_BEGIN]) / freq * 1000.0); } } // GPU cull handles visibility counting — approximate from chunkCount visibleChunks_ = chunkCount_; // exact count would require readback of drawCount return; } // ── CPU frustum + backface cull (shared by MDI and per-face paths) ── wi::primitive::Frustum frustum; frustum.Create(camera.GetViewProjection()); // ── Phase 2.2: CPU-filled indirect args + MDI draw ────────────── if (mdiEnabled_) { // CPU cull: fill indirect args with visible face groups cpuIndirectArgs_.clear(); uint32_t cpuDrawCount = 0; for (uint32_t i = 0; i < chunkCount_; i++) { const auto& slot = chunkSlots_[i]; if (slot.quadCount == 0) continue; XMFLOAT3 aabbMin( (float)(slot.pos.x * CHUNK_SIZE), (float)(slot.pos.y * CHUNK_SIZE), (float)(slot.pos.z * CHUNK_SIZE) ); XMFLOAT3 aabbMax( aabbMin.x + CHUNK_SIZE, aabbMin.y + CHUNK_SIZE, aabbMin.z + CHUNK_SIZE ); wi::primitive::AABB aabb(aabbMin, aabbMax); if (!frustum.CheckBoxFast(aabb)) continue; visibleChunks_++; const auto& info = cpuChunkInfo_[i]; for (uint32_t f = 0; f < 6; f++) { if (info.faceCounts[f] == 0) continue; bool backFacing = false; switch (f) { case 0: backFacing = (camera.Eye.x < aabbMin.x); break; case 1: backFacing = (camera.Eye.x > aabbMax.x); break; case 2: backFacing = (camera.Eye.y < aabbMin.y); break; case 3: backFacing = (camera.Eye.y > aabbMax.y); break; case 4: backFacing = (camera.Eye.z < aabbMin.z); break; case 5: backFacing = (camera.Eye.z > aabbMax.z); break; } if (backFacing) continue; IndirectDrawArgs args = {}; // Pack chunkIndex (low 16 bits) + faceIndex (high 16 bits) into push constant. // The shader unpacks this to look up quadOffset from GPUChunkInfo. // We do NOT use startVertexLocation because SV_VertexID may not include it // reliably in ExecuteIndirect context. args.pushConstant = i | (f << 16); args.vertexCountPerInstance = info.faceCounts[f] * 6; args.instanceCount = 1; args.startVertexLocation = 0; args.startInstanceLocation = 0; cpuIndirectArgs_.push_back(args); cpuDrawCount++; } } // Upload indirect args and draw count to GPU // Note: no explicit barriers needed here. Buffers start in COMMON each frame // (DX12 buffer decay after command list execution). COMMON is implicitly // promoted to COPY_DST by UpdateBuffer, then to INDIRECT_ARGUMENT by // DrawInstancedIndirectCount. This matches Phase 2.1 pattern (no barriers // between UpdateBuffer and SRV usage for megaQuadBuffer_/chunkInfoBuffer_). if (!cpuIndirectArgs_.empty()) { dev->UpdateBuffer(&indirectArgsBuffer_, cpuIndirectArgs_.data(), cmd, cpuIndirectArgs_.size() * sizeof(IndirectDrawArgs)); } dev->UpdateBuffer(&drawCountBuffer_, &cpuDrawCount, cmd, sizeof(uint32_t)); // ── Render pass ──────────────────────────────────────────── RenderPassImage rp[] = { RenderPassImage::RenderTarget( &renderTarget, RenderPassImage::LoadOp::CLEAR, RenderPassImage::StoreOp::STORE, ResourceState::SHADER_RESOURCE, ResourceState::SHADER_RESOURCE ), RenderPassImage::DepthStencil( &depthBuffer, RenderPassImage::LoadOp::CLEAR, RenderPassImage::StoreOp::STORE, ResourceState::DEPTHSTENCIL, ResourceState::DEPTHSTENCIL, ResourceState::DEPTHSTENCIL ), }; dev->RenderPassBegin(rp, 2, cmd); Viewport vp; vp.width = (float)renderTarget.GetDesc().width; vp.height = (float)renderTarget.GetDesc().height; vp.min_depth = 0.0f; vp.max_depth = 1.0f; dev->BindViewports(1, &vp, cmd); Rect scissor = { 0, 0, (int)vp.width, (int)vp.height }; dev->BindScissorRects(1, &scissor, cmd); dev->BindPipelineState(&pso_, cmd); dev->BindConstantBuffer(&constantBuffer_, 0, cmd); dev->BindResource(&megaQuadBuffer_, 0, cmd); dev->BindResource(&textureArray_, 1, cmd); dev->BindResource(&chunkInfoBuffer_, 2, cmd); dev->BindSampler(&sampler_, 0, cmd); // MDI mode: VS uses binary search to find chunk from SV_VertexID VoxelPush pushData = {}; pushData.flags = 1; // MDI mode dev->PushConstants(&pushData, sizeof(pushData), cmd); dev->DrawInstancedIndirectCount( &indirectArgsBuffer_, 0, &drawCountBuffer_, 0, MAX_DRAWS, cmd ); drawCalls_ = 1; dev->RenderPassEnd(cmd); return; } // ── Phase 2.1 Fallback: per-face-group DrawInstanced ──────────── RenderPassImage rp[] = { RenderPassImage::RenderTarget( &renderTarget, RenderPassImage::LoadOp::CLEAR, RenderPassImage::StoreOp::STORE, ResourceState::SHADER_RESOURCE, ResourceState::SHADER_RESOURCE ), RenderPassImage::DepthStencil( &depthBuffer, RenderPassImage::LoadOp::CLEAR, RenderPassImage::StoreOp::STORE, ResourceState::DEPTHSTENCIL, ResourceState::DEPTHSTENCIL, ResourceState::DEPTHSTENCIL ), }; dev->RenderPassBegin(rp, 2, cmd); Viewport vp; vp.width = (float)renderTarget.GetDesc().width; vp.height = (float)renderTarget.GetDesc().height; vp.min_depth = 0.0f; vp.max_depth = 1.0f; dev->BindViewports(1, &vp, cmd); Rect scissor = { 0, 0, (int)vp.width, (int)vp.height }; dev->BindScissorRects(1, &scissor, cmd); dev->BindPipelineState(&pso_, cmd); dev->BindConstantBuffer(&constantBuffer_, 0, cmd); dev->BindResource(&megaQuadBuffer_, 0, cmd); dev->BindResource(&textureArray_, 1, cmd); dev->BindResource(&chunkInfoBuffer_, 2, cmd); dev->BindSampler(&sampler_, 0, cmd); for (uint32_t i = 0; i < chunkCount_; i++) { const auto& slot = chunkSlots_[i]; if (slot.quadCount == 0) continue; XMFLOAT3 aabbMin( (float)(slot.pos.x * CHUNK_SIZE), (float)(slot.pos.y * CHUNK_SIZE), (float)(slot.pos.z * CHUNK_SIZE) ); XMFLOAT3 aabbMax( aabbMin.x + CHUNK_SIZE, aabbMin.y + CHUNK_SIZE, aabbMin.z + CHUNK_SIZE ); wi::primitive::AABB aabb(aabbMin, aabbMax); if (!frustum.CheckBoxFast(aabb)) continue; visibleChunks_++; const auto& info = cpuChunkInfo_[i]; for (uint32_t f = 0; f < 6; f++) { if (info.faceCounts[f] == 0) continue; bool backFacing = false; switch (f) { case 0: backFacing = (camera.Eye.x < aabbMin.x); break; case 1: backFacing = (camera.Eye.x > aabbMax.x); break; case 2: backFacing = (camera.Eye.y < aabbMin.y); break; case 3: backFacing = (camera.Eye.y > aabbMax.y); break; case 4: backFacing = (camera.Eye.z < aabbMin.z); break; case 5: backFacing = (camera.Eye.z > aabbMax.z); break; } if (backFacing) continue; VoxelPush pushData = {}; pushData.chunkIndex = i; pushData.quadOffset = slot.quadOffset + info.faceOffsets[f]; pushData.flags = 0; // CPU mode dev->PushConstants(&pushData, sizeof(pushData), cmd); dev->DrawInstanced(info.faceCounts[f] * 6, 1, 0, 0, cmd); drawCalls_++; } } dev->RenderPassEnd(cmd); } // ── VoxelRenderPath (custom RenderPath3D) ─────────────────────── void VoxelRenderPath::Start() { RenderPath3D::Start(); auto* device = wi::graphics::GetDevice(); renderer.initialize(device); renderer.debugFaceColors_ = debugMode; // Generate world if (debugMode) { world.generateDebug(); cameraPos = { 10.0f, 10.0f, 0.0f }; cameraPitch = -0.4f; cameraYaw = 0.5f; } else { world.generateAround(cameraPos.x, cameraPos.y, cameraPos.z, 4); } if (renderer.isInitialized()) { renderer.updateMeshes(world); } worldGenerated_ = true; setAO(AO_DISABLED); setFXAAEnabled(true); setBloomEnabled(false); createRenderTargets(); } void VoxelRenderPath::createRenderTargets() { auto* device = wi::graphics::GetDevice(); if (!device) return; uint32_t w = GetPhysicalWidth(); uint32_t h = GetPhysicalHeight(); if (w == 0 || h == 0) { w = 1920; h = 1080; } wi::graphics::TextureDesc rtDesc; rtDesc.type = wi::graphics::TextureDesc::Type::TEXTURE_2D; rtDesc.width = w; rtDesc.height = h; rtDesc.format = wi::graphics::Format::R8G8B8A8_UNORM; rtDesc.bind_flags = wi::graphics::BindFlag::RENDER_TARGET | wi::graphics::BindFlag::SHADER_RESOURCE; rtDesc.mip_levels = 1; rtDesc.sample_count = 1; rtDesc.layout = wi::graphics::ResourceState::SHADER_RESOURCE; device->CreateTexture(&rtDesc, nullptr, &voxelRT_); wi::graphics::TextureDesc depthDesc; depthDesc.type = wi::graphics::TextureDesc::Type::TEXTURE_2D; depthDesc.width = w; depthDesc.height = h; depthDesc.format = wi::graphics::Format::D32_FLOAT; depthDesc.bind_flags = wi::graphics::BindFlag::DEPTH_STENCIL | wi::graphics::BindFlag::SHADER_RESOURCE; depthDesc.mip_levels = 1; depthDesc.sample_count = 1; depthDesc.layout = wi::graphics::ResourceState::DEPTHSTENCIL; device->CreateTexture(&depthDesc, nullptr, &voxelDepth_); rtCreated_ = voxelRT_.IsValid() && voxelDepth_.IsValid(); wi::backlog::post("VoxelRenderPath: render targets " + std::string(rtCreated_ ? "OK" : "FAILED") + " (" + std::to_string(w) + "x" + std::to_string(h) + ")"); } // ── WASD camera input ─────────────────────────────────────────── static constexpr wi::input::BUTTON KEY_W = (wi::input::BUTTON)(wi::input::CHARACTER_RANGE_START + ('W' - 'A')); static constexpr wi::input::BUTTON KEY_A = (wi::input::BUTTON)(wi::input::CHARACTER_RANGE_START + ('A' - 'A')); static constexpr wi::input::BUTTON KEY_S = (wi::input::BUTTON)(wi::input::CHARACTER_RANGE_START + ('S' - 'A')); static constexpr wi::input::BUTTON KEY_D = (wi::input::BUTTON)(wi::input::CHARACTER_RANGE_START + ('D' - 'A')); void VoxelRenderPath::handleInput(float dt) { if (wi::input::Press(wi::input::MOUSE_BUTTON_RIGHT)) { mouseCaptured = !mouseCaptured; wi::input::HidePointer(mouseCaptured); } if (mouseCaptured) { auto mouseState = wi::input::GetMouseState(); cameraYaw += mouseState.delta_position.x * cameraSensitivity; cameraPitch += mouseState.delta_position.y * cameraSensitivity; cameraPitch = std::clamp(cameraPitch, -1.5f, 1.5f); } float cosPitch = std::cos(cameraPitch); XMFLOAT3 forward( std::sin(cameraYaw) * cosPitch, -std::sin(cameraPitch), std::cos(cameraYaw) * cosPitch ); XMFLOAT3 right(std::cos(cameraYaw), 0.0f, -std::sin(cameraYaw)); float speed = cameraSpeed * dt; if (wi::input::Down(wi::input::KEYBOARD_BUTTON_LSHIFT)) speed *= 3.0f; if (wi::input::Down(KEY_W)) { cameraPos.x += forward.x * speed; cameraPos.y += forward.y * speed; cameraPos.z += forward.z * speed; } if (wi::input::Down(KEY_S)) { cameraPos.x -= forward.x * speed; cameraPos.y -= forward.y * speed; cameraPos.z -= forward.z * speed; } if (wi::input::Down(KEY_A)) { cameraPos.x -= right.x * speed; cameraPos.z -= right.z * speed; } if (wi::input::Down(KEY_D)) { cameraPos.x += right.x * speed; cameraPos.z += right.z * speed; } if (wi::input::Down(wi::input::KEYBOARD_BUTTON_SPACE)) cameraPos.y += speed; if (wi::input::Down(wi::input::KEYBOARD_BUTTON_LCONTROL)) cameraPos.y -= speed; camera->Eye = cameraPos; camera->At = forward; camera->Up = XMFLOAT3(0, 1, 0); camera->UpdateCamera(); } void VoxelRenderPath::Update(float dt) { lastDt_ = dt; float instantFps = (dt > 0.0f) ? (1.0f / dt) : 0.0f; smoothFps_ = smoothFps_ * 0.95f + instantFps * 0.05f; if (camera) handleInput(dt); if (renderer.isInitialized()) renderer.updateMeshes(world); RenderPath3D::Update(dt); } void VoxelRenderPath::Render() const { RenderPath3D::Render(); if (renderer.isInitialized() && camera && rtCreated_) { auto* device = wi::graphics::GetDevice(); CommandList cmd = device->BeginCommandList(); renderer.render(cmd, *camera, voxelDepth_, voxelRT_); } } void VoxelRenderPath::Compose(CommandList cmd) const { frameCount_++; RenderPath3D::Compose(cmd); if (rtCreated_ && voxelRT_.IsValid()) { wi::image::Params fx; fx.enableFullScreen(); fx.blendFlag = wi::enums::BLENDMODE_OPAQUE; wi::image::Draw(&voxelRT_, fx, cmd); } // HUD overlay wi::font::Params fp; fp.posX = 10; fp.posY = 10; fp.size = 20; fp.color = wi::Color(255, 255, 255, 230); fp.shadowColor = wi::Color(0, 0, 0, 180); char fpsStr[16]; snprintf(fpsStr, sizeof(fpsStr), "%.1f", smoothFps_); char dtStr[16]; snprintf(dtStr, sizeof(dtStr), "%.2f", lastDt_ * 1000.0f); std::string stats = "BVLE Voxel Engine (Phase 2 — GPU-driven)\n"; stats += "FPS: " + std::string(fpsStr) + " (" + std::string(dtStr) + " ms)\n"; if (debugMode) { stats += "=== DEBUG FACE MODE ===\n"; stats += "+X=Red -X=DkRed +Y=Green -Y=DkGreen +Z=Blue -Z=DkBlue\n"; } stats += "Chunks: " + std::to_string(renderer.getVisibleChunks()) + "/" + std::to_string(renderer.getChunkCount()) + "\n"; stats += "Quads: " + std::to_string(renderer.getTotalQuads()) + "\n"; std::string renderMode; if (renderer.isGpuCulling()) renderMode = "MDI + GPU cull"; else if (renderer.isMdiEnabled()) renderMode = "MDI + CPU cull"; else renderMode = "DrawInstanced + CPU cull + backface"; stats += "Draw Calls: " + std::to_string(renderer.getDrawCalls()) + " (" + renderMode + ")\n"; char cullStr[16], drawStr[16]; snprintf(cullStr, sizeof(cullStr), "%.3f", renderer.getGpuCullTimeMs()); snprintf(drawStr, sizeof(drawStr), "%.3f", renderer.getGpuDrawTimeMs()); stats += "GPU Cull: " + std::string(cullStr) + " ms | Draw: " + std::string(drawStr) + " ms\n"; stats += "WASD+Space/Ctrl: move | Shift: fast | Right-click: capture mouse"; wi::font::Draw(stats, fp, cmd); } } // namespace voxel