commit 5f346bb14a2a21b581255d49003d85ae0a12dc6a Author: Samuel Bouchet Date: Wed Mar 25 14:24:05 2026 +0100 Phase 2: GPU-driven voxel rendering pipeline Mega-buffer architecture replacing per-chunk GPU buffers: - Single StructuredBuffer for all chunks (2M quads, 16 MB) - StructuredBuffer with per-chunk metadata (position, quad offsets, face groups) - VS reads chunk info via push constants (b999) for driver-safe chunk indexing - CPU frustum culling with wi::primitive::Frustum + AABB per chunk - Quads sorted by face direction in greedy mesher (faceOffsets/faceCounts) - GPU frustum + backface cull compute shader (voxelCullCS.hlsl) - GPU binary mesher compute shader baseline (voxelMeshCS.hlsl) - Indirect draw buffers and timestamp query infrastructure - README with build instructions and project architecture diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7244c41 --- /dev/null +++ b/.gitignore @@ -0,0 +1,31 @@ +# Build output +build/ +out/ +cmake-build-*/ + +# Compiled shaders (regenerated at runtime by DXC) +shaders/hlsl6/ +shaders/spirv/ + +# Wicked Engine submodule (cloned separately) +engine/ + +# IDE +.vs/ +.vscode/ +*.suo +*.user +*.sln.docstates + +# Crash dumps & logs +*.dmp +bvle_crash.log +log.txt + +# OS +Thumbs.db +Desktop.ini +.DS_Store + +# Claude Code +.claude/ diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..39200a8 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,278 @@ +# BVLE Voxels - Prototype de Moteur Voxel Hybride + +## Vue d'ensemble + +Prototype de moteur voxel basé sur **Wicked Engine** (MIT, C++17, DX12/Vulkan) pour valider les performances de rendu sur GPU moderne (AMD RDNA 2+ / Nvidia RTX 3060+). Le document de spécification complet est dans `voxel_engine_spec.docx` à la racine du projet. + +Cible : 60+ fps en 1440p, monde de 512x512x256 voxels visibles. + +## Architecture + +``` +bvle-voxels/ +├── CMakeLists.txt # Build CMake racine +├── engine/ # Wicked Engine (clone --depth 1, branche main) +│ └── WickedEngine/shaders/voxel/ # Nos shaders copiés ici pour compilation DXC +├── src/ +│ ├── voxel/ # Bibliothèque VoxelEngine (static lib) +│ │ ├── VoxelTypes.h # Types fondamentaux (VoxelData, PackedQuad, MaterialDesc, ChunkPos) +│ │ ├── VoxelWorld.h/.cpp # Monde voxel (hashmap de chunks, génération procédurale) +│ │ ├── VoxelMesher.h/.cpp # Binary Greedy Mesher CPU +│ │ └── VoxelRenderer.h/.cpp# Renderer + VoxelRenderPath (sous-classe RenderPath3D) +│ └── app/ +│ └── main.cpp # Point d'entrée Win32 + crash handler SEH +├── shaders/ # Sources HLSL des shaders voxel (copiés dans engine/ au build) +│ ├── voxelCommon.hlsli # Root signature et CB partagés (inclus par VS et PS) +│ ├── voxelVS.hlsl # Vertex shader (vertex pulling) +│ └── voxelPS.hlsl # Pixel shader (triplanar + lighting) +└── CLAUDE.md +``` + +## Build + +### Prérequis + +- CMake 3.19+ (`winget install Kitware.CMake`) +- Visual Studio 2022 Build Tools (`winget install Microsoft.VisualStudio.2022.BuildTools`) +- Windows SDK 10.0.26100+ (`winget install Microsoft.WindowsSDK.10.0.26100`) + +### Commandes + +```bash +# Configurer (depuis la racine du projet) +cmake -B build -G "Visual Studio 17 2022" -A x64 -DCMAKE_SYSTEM_VERSION=10.0.26100.0 + +# Compiler +cmake --build build --config Release --target BVLEVoxels --parallel + +# Exécutable produit dans build/Release/BVLEVoxels.exe +``` + +Le SDK 10.0.26100 est requis car les headers DX12 (`d3dx12_check_feature_support.h`) fournis par Wicked Engine ne sont pas compatibles avec le SDK 22621. + +### Post-build automatique (CMakeLists.txt) + +Le build copie automatiquement : +1. `dxcompiler.dll` → à côté de l'exe (requis pour la compilation runtime des shaders) +2. `shaders/*.hlsl` → `engine/WickedEngine/shaders/voxel/` (pour que `LoadShader` les trouve via `SHADERSOURCEPATH`) +3. `engine/Content/` → à côté de l'exe (assets Wicked Engine) + +## Intégration Wicked Engine + +### Backend graphique + +Wicked Engine utilise **DX12 par défaut sur Windows**, Vulkan sur Linux. Les shaders sont écrits en **HLSL** et compilés via DXC vers : +- `shaders/hlsl6/*.cso` pour DX12 +- `shaders/spirv/*.spv` pour Vulkan + +Pour forcer Vulkan sur Windows, passer `"vulkan"` en argument de ligne de commande. + +### Point d'entrée et architecture de rendu + +`VoxelRenderPath` hérite de `wi::RenderPath3D`. **IMPORTANT** : le rendu voxel utilise ses propres render targets (`voxelRT_`, `voxelDepth_`) et est exécuté dans `Render()` sur un **command list dédié** (`device->BeginCommandList()`). Le résultat est ensuite composité dans `Compose()` via `wi::image::Draw()`. + +**NE JAMAIS créer un render pass dans `Compose()`** : cette méthode est appelée à l'intérieur du render pass du swapchain. Imbriquer des render passes est interdit en D3D12 (cause `DXGI_ERROR_INVALID_CALL → device removed`). + +Architecture correcte : +``` +Render() → RenderPath3D::Render() // Wicked rend sa scène + → device->BeginCommandList() // Nouveau cmd list + → renderer.render(cmd, ...) // Notre render pass (clear + draw voxels → voxelRT_) +Compose() → RenderPath3D::Compose() // Wicked affiche son résultat + → wi::image::Draw(voxelRT_) // On overlay nos voxels par-dessus +``` + +La caméra est gérée manuellement dans `Update()` en écrivant directement `camera->Eye`, `camera->At` (direction LookTo), `camera->Up`. + +### APIs Wicked utilisées + +| Besoin | API Wicked | +|--------|-----------| +| Clavier WASD | `wi::input::Down(CHARACTER_RANGE_START + offset)` (pas de `KEYBOARD_BUTTON_W`) | +| Souris delta | `wi::input::GetMouseState().delta_position` | +| Cacher curseur | `wi::input::HidePointer(bool)` | +| Shader loading | `wi::renderer::LoadShader()` - compile auto les .hlsl en .cso si absent | +| PSO states | `wi::renderer::GetRasterizerState()` etc. retournent des pointeurs (pas besoin de `&`) | +| Render pass | `RenderPassImage::RenderTarget(texture, loadOp, storeOp, layoutBefore, layoutAfter, subresource=-1)` | +| Font overlay | `wi::font::Params` est un struct - setter les membres un par un | +| Camera | `CameraComponent::At` est une **direction** (utilisé avec `XMMatrixLookToLH`), pas un point cible | +| Buffer create | `device->CreateBuffer(desc, raw_data_ptr, buffer)` — PAS de `SubresourceData` pour les buffers ! | +| Texture create | `device->CreateTexture(desc, subresourceData_ptr, texture)` — utilise `SubresourceData*` (différent de CreateBuffer) | +| Buffer update | `device->UpdateBuffer(buffer, data, cmd, size, offset)` | +| Push constants | `device->PushConstants(data, size, cmd)` — mappés à `register(b999)`, taille fixe 48 bytes (12 × uint32) | +| Command list | `device->BeginCommandList()` — nouveau cmd list pour render passes séparés | +| Render pass | NE JAMAIS imbriquer ! Un seul render pass actif par command list | +| Debug DX12 | Passer `"debugdevice"` en argument pour activer la couche de debug D3D12 | +| Logging | `wi::backlog::post(message, logLevel)` — préférer au logging fichier | + +### Shaders custom — PIÈGES IMPORTANTS + +Les shaders custom doivent respecter le **binding model de Wicked Engine** : + +1. **Root signature obligatoire** : chaque shader DOIT avoir une root signature DX12 intégrée, soit via `#include "globals.hlsli"` (auto), soit via `[RootSignature(MACRO)]` sur le entry point. + +2. **Root signature Wicked** (HLSL 6.6+) : + - `b999` → push constants (12 × uint32 = 48 bytes max) + - `b0, b1, b2` → CBV root descriptors + - `t0-t15, u0-u15` → dans une descriptor table partagée + - `s0-s7` → samplers dynamiques + - `s100-s109` → static samplers (linear, point, aniso, etc.) + +3. **Chemins des shaders** : + - `SHADERPATH` = `/shaders/hlsl6/` — où les `.cso` compilés sont stockés + - `SHADERSOURCEPATH` = `../../engine/WickedEngine/shaders/` — où les `.hlsl` sources sont cherchés + - Les shaders custom doivent être copiés dans `SHADERSOURCEPATH` (sous-dossier `voxel/`) + - `LoadShader(stage, shader, "voxel/voxelVS.cso")` → compile `SHADERSOURCEPATH/voxel/voxelVS.hlsl` si `.cso` absent + +4. **`dxcompiler.dll` doit être à côté de l'exe** sinon la compilation runtime échoue silencieusement. + +5. **CreateBuffer prend `void*`**, pas `SubresourceData*`. L'API texture (`CreateTexture`) prend bien `SubresourceData*`. + +6. **Winding des triangles — PIÈGE MAJEUR** : + + Wicked Engine utilise `front_counter_clockwise = true` + `CullMode::BACK` (state `RSTYPE_FRONT`). Malgré cela, les quads voxel doivent utiliser un winding **CW** (clockwise) comme défaut, pas CCW. Confirmé empiriquement via `SV_IsFrontFace` : avec des corners CCW standard, DX12 voit tous les triangles comme **back-facing**. + + La règle pour nos tangent axes U/V : + - `cross(U,V) = N` (faces +X, -Y, +Z) → corners **CW** pour être front-facing + - `cross(U,V) ≠ N` (faces -X, +Y, -Z) → corners **CCW** pour être front-facing + + ``` + CW corners: (0,0)(0,1)(1,0), (1,0)(0,1)(1,1) ← défaut + CCW corners: (0,0)(1,0)(0,1), (0,1)(1,0)(1,1) ← faces 1,2,5 + ``` + +### Diagnostics et debugging + +**Crash handler SEH** (`main.cpp`) : `SetUnhandledExceptionFilter` écrit : +- `bvle_crash.log` : stack trace avec symboles + adresses +- `bvle_crash.dmp` : minidump analysable avec Visual Studio +- Nécessite `dbghelp.lib` et build avec symbols (`RelWithDebInfo` ou `Debug`) + +**D3D12 Debug Layer** : lancer avec `BVLEVoxels.exe debugdevice` pour activer. Active aussi DRED (Device Removed Extended Data) pour diagnostiquer les GPU hangs. + +**Erreurs GPU courantes** : +- `DXGI_ERROR_INVALID_CALL` → render pass imbriqué ou resource state invalide +- `DXGI_ERROR_DEVICE_HUNG` → shader en boucle infinie ou accès mémoire hors limites +- Dialog bloquant avec `messageBox` → vient de `wi::helper::messageBox()`, ne pas confondre avec un crash + +**Backlog Wicked** : `wi::backlog::SetLogFile("bvle_backlog.txt")` redirige les logs vers un fichier. Touche `~` (tilde) pour toggler la console à l'écran. + +**Mode debug face-color** : lancer avec `BVLEVoxels.exe debug` pour activer. Génère un monde de test (blocs isolés) et colore chaque face selon sa direction : +- Bright Red / Dark Red = +X / -X +- Bright Green / Dark Green = +Y / -Y +- Bright Blue / Dark Blue = +Z / -Z + +## Détails d'implémentation + +### VoxelData (16 bits) + +``` +[15:8] material ID (256 matériaux) +[7:4] flags (smooth, transparent, emissive, custom) +[3:0] metadata (orientation, variant) +``` + +### PackedQuad (64 bits = 8 octets par quad) + +``` +[5:0] position X (0-63) +[11:6] position Y (0-63) +[17:12] position Z (0-63) +[23:18] width (1-32) +[29:24] height (1-32) +[32:30] face (0-5 : +X,-X,+Y,-Y,+Z,-Z) +[40:33] material ID +[48:41] AO (4x2 bits par coin) +[63:49] flags (réservés) +``` + +### Binary Greedy Mesher (CPU, `VoxelMesher.cpp`) + +1. **Masques binaires** : pour chaque axe (X,Y,Z), `solid[u][v]` = bitmask 32 bits de voxels solides +2. **Face culling** : `visible = solid & ~(solid >> 1)` pour faces positives (shift adapté par direction), avec lookup cross-chunk aux frontières +3. **Greedy merge** : par tranche de profondeur, grille 2D de material IDs, expansion rectangulaire maximale (largeur puis hauteur) + +### Génération procédurale (`VoxelWorld.cpp`) + +- Perlin noise 3D (permutation-based, seed configurable) +- fBm 5 octaves pour le heightmap +- Caves : `|fbm(x,y,z)| < threshold` en 3D +- Matériaux par altitude : sable < 25, herbe 25-70, pierre 70-90, neige > 90 +- Chunks générés en Y = 0..7 (hauteur max 256 blocs) + +### Renderer (`VoxelRenderer.cpp`) + +- **Vertex pulling** : pas de vertex buffer classique, le VS lit un `StructuredBuffer` via `SV_VertexID` +- **Pipeline** : PSO avec `RSTYPE_FRONT` (backface cull), `DSSTYPE_DEFAULT` (depth test), `BSTYPE_OPAQUE` +- **Per-chunk** : push constants (b999, 48 bytes) pour la position monde du chunk, bind du quad buffer en `t0` +- **Textures** : texture array 2D (256x256, 5 layers) générée procéduralement, triplanar mapping dans le PS +- **Culling** : distance-based simple (512 blocs), pas de frustum culling GPU +- **Render targets propres** : `voxelRT_` (R8G8B8A8) + `voxelDepth_` (D32_FLOAT), rendu dans `Render()` sur cmd list dédié +- **Composition** : overlay sur le swapchain via `wi::image::Draw()` dans `Compose()` +- **Stats overlay** : affichage HUD des chunks/quads/draw calls via `wi::font::Draw` + +## Phases de développement (spec) + +### Phase 1 - Setup et meshing de base [FAIT] + +- Fork Wicked Engine, structure de modules +- VoxelWorld avec génération procédurale Perlin (rayon 4 chunks = ~150 chunks) +- Binary Greedy Mesher CPU (~300K quads pour le monde initial) +- Rendu basique avec vertex pulling et texture array +- Caméra libre de navigation (WASD + souris) +- Crash handler SEH avec stack trace symbolique + +### Phase 2 - Performance GPU [A FAIRE] + +- Porter le mesher en compute shader +- MultiDrawIndirect (un seul draw call pour tous les chunks) +- Frustum culling GPU + indirect args +- Backface culling par orientation (6 groupes de faces) +- Benchmark CPU vs GPU mesher + +### Phase 3 - Texture blending [A FAIRE] + +- Triplanar mapping (déjà en place, à affiner) +- Height-based blending aux frontières de matériaux +- Heightmaps dans le canal alpha ou texture séparée +- Neighbor material ID dans le vertex format (8 bits dans les flags réservés) + +### Phase 4 - Toping [A FAIRE] + +- TopingSystem avec bitmask d'adjacence 4 bits (16 variantes) +- Instance buffer GPU par chunk +- Instanced draw dans le G-buffer +- 2-3 types de test (rebord de pierre, bordure d'herbe) + +### Phase 5 - Rendu smooth [A FAIRE] + +- Surface Nets (ou Marching Cubes) en compute shader +- Flag `smooth` dans VoxelData +- Coexistence blocky/smooth dans le même chunk +- Buffer séparé pour les triangles smooth + +### Phase 6 - Ray tracing hybride [A FAIRE] + +- BLAS par chunk (depuis le mesh greedy), TLAS par frame +- RT Shadows via ray queries (compute shader) +- RT AO (4-8 rayons, courte portée) +- Fallback shadow maps / SSAO si RT non disponible + +## Métriques cibles + +| Métrique | Cible | +|----------|-------| +| FPS 1440p | > 60 fps, monde 512x512x128 | +| Meshing GPU | < 200 us par chunk 32^3 | +| Re-mesh | < 1 frame (16ms) pour 1 chunk | +| Mémoire GPU | < 500 Mo pour 512x512x128 | +| RT shadows + AO | < 4ms en 1440p | +| Draw calls | < 100 (hors post-process) | + +## Conventions + +- Namespaces : tout le code voxel est dans `namespace voxel` +- Chunks : 32x32x32, configurable via `CHUNK_SIZE` +- Coordonnées : Y = haut, monde infini en X/Z, hashmap sparse +- Matériaux : palette de 256, index 0 = air (vide) +- Faces : 0=+X, 1=-X, 2=+Y, 3=-Y, 4=+Z, 5=-Z diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..0d75d5c --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,51 @@ +cmake_minimum_required(VERSION 3.19) +project(BVLEVoxels LANGUAGES CXX) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS OFF) + +# Wicked Engine options - disable what we don't need +set(WICKED_EDITOR OFF CACHE BOOL "" FORCE) +set(WICKED_TESTS OFF CACHE BOOL "" FORCE) +set(WICKED_IMGUI_EXAMPLE OFF CACHE BOOL "" FORCE) +set(WICKED_WINDOWS_TEMPLATE OFF CACHE BOOL "" FORCE) +set(WICKED_LINUX_TEMPLATE OFF CACHE BOOL "" FORCE) +set(WICKED_ENABLE_SYMLINKS OFF CACHE BOOL "" FORCE) + +add_subdirectory(engine) + +# ── Voxel Engine Library ────────────────────────────────────────── +file(GLOB_RECURSE VOXEL_SOURCES src/voxel/*.cpp src/voxel/*.h) +add_library(VoxelEngine STATIC ${VOXEL_SOURCES}) +target_include_directories(VoxelEngine PUBLIC src) +target_link_libraries(VoxelEngine PUBLIC WickedEngine) + +# ── Main Application ───────────────────────────────────────────── +file(GLOB APP_SOURCES src/app/*.cpp src/app/*.h) +add_executable(BVLEVoxels WIN32 ${APP_SOURCES}) +target_link_libraries(BVLEVoxels PRIVATE VoxelEngine WickedEngine) + +# Copy Content directory (shaders, etc.) to build output +add_custom_command(TARGET BVLEVoxels POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_directory + ${CMAKE_SOURCE_DIR}/engine/Content + $/Content +) + +# Copy DXC shader compiler DLL next to the exe (required for runtime shader compilation) +add_custom_command(TARGET BVLEVoxels POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_if_different + ${CMAKE_SOURCE_DIR}/engine/WickedEngine/dxcompiler.dll + $/dxcompiler.dll + COMMENT "Copying DXC shader compiler DLL" +) + +# Copy our custom shader sources into Wicked's shader source tree +# so LoadShader can find and compile them as "voxel/voxelVS.cso" +add_custom_command(TARGET BVLEVoxels POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_directory + ${CMAKE_SOURCE_DIR}/shaders + ${CMAKE_SOURCE_DIR}/engine/WickedEngine/shaders/voxel + COMMENT "Copying voxel shaders to Wicked Engine shader source directory" +) diff --git a/README.md b/README.md new file mode 100644 index 0000000..b5faea3 --- /dev/null +++ b/README.md @@ -0,0 +1,120 @@ +# BVLE Voxels + +Prototype de moteur voxel hybride basé sur [Wicked Engine](https://github.com/turanszkij/WickedEngine) (MIT, C++17, DX12/Vulkan). + +Cible : **60+ fps en 1440p**, monde de 512×512×256 voxels visibles. + +![Phase 2](https://img.shields.io/badge/Phase-2%20GPU--driven-blue) + +## Prérequis + +| Outil | Version | Installation | +|-------|---------|-------------| +| **Windows** | 10/11 (x64) | — | +| **CMake** | 3.19+ | `winget install Kitware.CMake` | +| **Visual Studio 2022 Build Tools** | 17.x | `winget install Microsoft.VisualStudio.2022.BuildTools` | +| **Windows SDK** | 10.0.26100+ | `winget install Microsoft.WindowsSDK.10.0.26100` | +| **GPU** | DX12 feature level 12.0+ | AMD RDNA 2+ / Nvidia RTX 3060+ recommandé | + +> Le SDK **10.0.26100** est requis car les headers DX12 fournis par Wicked Engine ne sont pas compatibles avec le SDK 22621. + +## Installation + +```bash +# 1. Cloner le dépôt +git clone bvle-voxels +cd bvle-voxels + +# 2. Cloner Wicked Engine dans engine/ +git clone --depth 1 https://github.com/turanszkij/WickedEngine.git engine + +# 3. Configurer CMake +cmake -B build -G "Visual Studio 17 2022" -A x64 -DCMAKE_SYSTEM_VERSION=10.0.26100.0 + +# 4. Compiler +cmake --build build --config Release --target BVLEVoxels --parallel + +# 5. Lancer +./build/Release/BVLEVoxels.exe +``` + +## Commandes de lancement + +| Commande | Description | +|----------|-------------| +| `BVLEVoxels.exe` | Mode normal (monde procédural, rayon 4 chunks) | +| `BVLEVoxels.exe debug` | Mode debug face-color (+X=Rouge, -X=Rouge sombre, etc.) | +| `BVLEVoxels.exe debugdevice` | Active la couche de debug D3D12 | +| `BVLEVoxels.exe vulkan` | Force le backend Vulkan | + +## Contrôles + +| Touche | Action | +|--------|--------| +| **WASD** | Déplacement caméra | +| **Espace / Ctrl** | Monter / Descendre | +| **Shift** | Vitesse ×3 | +| **Clic droit** | Capturer/libérer la souris | +| **~** (tilde) | Console Wicked Engine | + +## Architecture + +``` +bvle-voxels/ +├── CMakeLists.txt # Build CMake +├── engine/ # Wicked Engine (git clone --depth 1) +├── src/ +│ ├── voxel/ # Bibliothèque VoxelEngine (static lib) +│ │ ├── VoxelTypes.h # Types (VoxelData, PackedQuad, ChunkPos) +│ │ ├── VoxelWorld.h/.cpp # Monde voxel (hashmap, génération Perlin) +│ │ ├── VoxelMesher.h/.cpp # Binary Greedy Mesher CPU +│ │ └── VoxelRenderer.h/.cpp# Renderer GPU-driven + VoxelRenderPath +│ └── app/ +│ └── main.cpp # Point d'entrée Win32 +├── shaders/ # Sources HLSL +│ ├── voxelCommon.hlsli # Root signature, CB, structs partagés +│ ├── voxelVS.hlsl # Vertex shader (vertex pulling) +│ ├── voxelPS.hlsl # Pixel shader (triplanar + lighting) +│ ├── voxelCullCS.hlsl # Compute: frustum + backface culling +│ └── voxelMeshCS.hlsl # Compute: GPU mesher (binary, baseline) +└── voxel_engine_spec.docx # Document de spécification complet +``` + +## Pipeline de rendu (Phase 2 — GPU-driven) + +``` +CPU: mesh dirty chunks (greedy merge) → pack quads + chunkInfo dans mega-buffers → upload GPU +GPU: frustum cull compute → indirect args → DrawInstancedIndirectCount (1 appel = N draws) +``` + +**Buffers GPU :** + +| Buffer | Type | Slot | Rôle | +|--------|------|------|------| +| `megaQuadBuffer_` | StructuredBuffer\ | SRV t0 | 2M quads max (16 MB) | +| `chunkInfoBuffer_` | StructuredBuffer\ | SRV t2 | 2048 chunks max | +| `indirectArgsBuffer_` | RWStructuredBuffer | UAV u0 | Indirect draw args | +| `drawCountBuffer_` | RWByteAddressBuffer | UAV u1 | Compteur atomique de draws | + +**Caractéristiques Phase 2 :** +- Mega-buffer unique pour tous les quads de tous les chunks +- Vertex pulling via `SV_VertexID` + push constants (`b999`) +- Frustum culling CPU (wi::primitive::Frustum) +- Backface culling par face group (6 directions × chunk) +- GPU frustum cull compute shader (prêt, activation via flag) +- GPU mesher compute shader baseline (binary, sans greedy merge) +- Tri des quads par direction de face dans le mesher CPU +- GPU timestamp queries pour benchmark + +## Phases de développement + +- [x] **Phase 1** — Setup, meshing CPU, rendu basique +- [x] **Phase 2** — GPU-driven pipeline, mega-buffer, culling, compute shaders +- [ ] **Phase 3** — Texture blending (triplanar, height-based) +- [ ] **Phase 4** — Toping (rebords, bordures procédurales) +- [ ] **Phase 5** — Rendu smooth (Surface Nets / Marching Cubes) +- [ ] **Phase 6** — Ray tracing hybride (RT shadows + AO) + +## Licence + +Wicked Engine est sous licence MIT. Le code spécifique BVLE est propriétaire. diff --git a/shaders/voxelCommon.hlsli b/shaders/voxelCommon.hlsli new file mode 100644 index 0000000..367d230 --- /dev/null +++ b/shaders/voxelCommon.hlsli @@ -0,0 +1,99 @@ +// BVLE Voxels - Shared shader definitions +// Root signature, common structures, and constant buffers for voxel shaders. + +#ifndef VOXEL_COMMON_HLSLI +#define VOXEL_COMMON_HLSLI + +// Wicked Engine DX12 root signature (HLSL 6.6+ bindless model) +// b999: push constants (12 x uint32 = 48 bytes) +// b0-b2: root CBV descriptors +// t0-t15, u0-u15: SRV/UAV descriptor table +// s0-s7: dynamic samplers +// s100+: static samplers +#define VOXEL_ROOTSIG \ + "RootFlags(CBV_SRV_UAV_HEAP_DIRECTLY_INDEXED | SAMPLER_HEAP_DIRECTLY_INDEXED), " \ + "RootConstants(num32BitConstants=12, b999), " \ + "CBV(b0), " \ + "CBV(b1), " \ + "CBV(b2), " \ + "DescriptorTable( " \ + "CBV(b3, numDescriptors = 11, flags = DATA_STATIC_WHILE_SET_AT_EXECUTE)," \ + "SRV(t0, numDescriptors = 16, flags = DESCRIPTORS_VOLATILE | DATA_STATIC_WHILE_SET_AT_EXECUTE)," \ + "UAV(u0, numDescriptors = 16, flags = DESCRIPTORS_VOLATILE | DATA_STATIC_WHILE_SET_AT_EXECUTE)" \ + ")," \ + "DescriptorTable( " \ + "Sampler(s0, offset = 0, numDescriptors = 8, flags = DESCRIPTORS_VOLATILE)" \ + ")," \ + "StaticSampler(s100, addressU = TEXTURE_ADDRESS_CLAMP, addressV = TEXTURE_ADDRESS_CLAMP, addressW = TEXTURE_ADDRESS_CLAMP, filter = FILTER_MIN_MAG_MIP_LINEAR)," \ + "StaticSampler(s101, addressU = TEXTURE_ADDRESS_WRAP, addressV = TEXTURE_ADDRESS_WRAP, addressW = TEXTURE_ADDRESS_WRAP, filter = FILTER_MIN_MAG_MIP_LINEAR)," \ + "StaticSampler(s102, addressU = TEXTURE_ADDRESS_MIRROR, addressV = TEXTURE_ADDRESS_MIRROR, addressW = TEXTURE_ADDRESS_MIRROR, filter = FILTER_MIN_MAG_MIP_LINEAR)," \ + "StaticSampler(s103, addressU = TEXTURE_ADDRESS_CLAMP, addressV = TEXTURE_ADDRESS_CLAMP, addressW = TEXTURE_ADDRESS_CLAMP, filter = FILTER_MIN_MAG_MIP_POINT)," \ + "StaticSampler(s104, addressU = TEXTURE_ADDRESS_WRAP, addressV = TEXTURE_ADDRESS_WRAP, addressW = TEXTURE_ADDRESS_WRAP, filter = FILTER_MIN_MAG_MIP_POINT)," \ + "StaticSampler(s105, addressU = TEXTURE_ADDRESS_MIRROR, addressV = TEXTURE_ADDRESS_MIRROR, addressW = TEXTURE_ADDRESS_MIRROR, filter = FILTER_MIN_MAG_MIP_POINT)," \ + "StaticSampler(s106, addressU = TEXTURE_ADDRESS_CLAMP, addressV = TEXTURE_ADDRESS_CLAMP, addressW = TEXTURE_ADDRESS_CLAMP, filter = FILTER_ANISOTROPIC, maxAnisotropy = 16)," \ + "StaticSampler(s107, addressU = TEXTURE_ADDRESS_WRAP, addressV = TEXTURE_ADDRESS_WRAP, addressW = TEXTURE_ADDRESS_WRAP, filter = FILTER_ANISOTROPIC, maxAnisotropy = 16)," \ + "StaticSampler(s108, addressU = TEXTURE_ADDRESS_MIRROR, addressV = TEXTURE_ADDRESS_MIRROR, addressW = TEXTURE_ADDRESS_MIRROR, filter = FILTER_ANISOTROPIC, maxAnisotropy = 16)," \ + "StaticSampler(s109, addressU = TEXTURE_ADDRESS_CLAMP, addressV = TEXTURE_ADDRESS_CLAMP, addressW = TEXTURE_ADDRESS_CLAMP, filter = FILTER_COMPARISON_MIN_MAG_LINEAR_MIP_POINT, comparisonFunc = COMPARISON_GREATER_EQUAL)," + +// ── Per-frame constant buffer (b0) ────────────────────────────── +cbuffer VoxelCB : register(b0) { + float4x4 viewProjection; + float4 cameraPosition; + float4 sunDirection; + float4 sunColor; + float chunkSize; + float textureTiling; + float2 _pad; + // Frustum culling data (used by cull compute shader) + float4 frustumPlanes[6]; // ax+by+cz+d=0, xyz=normal, w=distance + uint chunkCount; + uint _cullPad0; + uint _cullPad1; + uint _cullPad2; +}; + +// ── Indirect draw args (must match C++ IndirectDrawArgs / DX12 DrawInstanced) ── +struct IndirectDrawArgsInstanced { + uint vertexCountPerInstance; + uint instanceCount; + uint startVertexLocation; + uint startInstanceLocation; +}; + +// ── GPU chunk info (must match C++ GPUChunkInfo, 80 bytes) ────── +// NOTE: No arrays — scalar-only to guarantee C-style packing in StructuredBuffer. +struct GPUChunkInfo { + float4 worldPos; // xyz = chunk origin in world space, w = debug flag + uint quadOffset; // offset into mega quad buffer (in quads) + uint quadCount; // number of quads for this chunk + uint _pad0; + uint _pad1; + // Per-face data (6 faces: +X -X +Y -Y +Z -Z) + uint faceOff0, faceOff1, faceOff2, faceOff3, faceOff4, faceOff5; + uint faceCnt0, faceCnt1, faceCnt2, faceCnt3, faceCnt4, faceCnt5; +}; + +// Helper functions to access scalar face fields by index +uint getFaceOffset(GPUChunkInfo info, uint f) { + switch (f) { + case 0: return info.faceOff0; + case 1: return info.faceOff1; + case 2: return info.faceOff2; + case 3: return info.faceOff3; + case 4: return info.faceOff4; + default: return info.faceOff5; + } +} + +uint getFaceCount(GPUChunkInfo info, uint f) { + switch (f) { + case 0: return info.faceCnt0; + case 1: return info.faceCnt1; + case 2: return info.faceCnt2; + case 3: return info.faceCnt3; + case 4: return info.faceCnt4; + default: return info.faceCnt5; + } +} + +#endif // VOXEL_COMMON_HLSLI diff --git a/shaders/voxelCullCS.hlsl b/shaders/voxelCullCS.hlsl new file mode 100644 index 0000000..075582d --- /dev/null +++ b/shaders/voxelCullCS.hlsl @@ -0,0 +1,93 @@ +// BVLE Voxels - Frustum + Backface Culling Compute Shader +// 1 thread per chunk: tests AABB vs 6 frustum planes, then emits up to 6 draws +// (one per visible face group, back-facing groups are culled). + +#include "voxelCommon.hlsli" + +StructuredBuffer chunkInfoBuffer : register(t2); +RWStructuredBuffer indirectArgs : register(u0); +RWByteAddressBuffer drawCount : register(u1); + +// Test AABB against 6 frustum planes (returns true if visible) +bool frustumTestAABB(float3 aabbMin, float3 aabbMax) +{ + [unroll] + for (uint i = 0; i < 6; i++) + { + float4 plane = frustumPlanes[i]; + float3 pVertex; + pVertex.x = (plane.x >= 0.0) ? aabbMax.x : aabbMin.x; + pVertex.y = (plane.y >= 0.0) ? aabbMax.y : aabbMin.y; + pVertex.z = (plane.z >= 0.0) ? aabbMax.z : aabbMin.z; + + if (dot(plane.xyz, pVertex) + plane.w < 0.0) + return false; + } + return true; +} + +// Face normals: +X, -X, +Y, -Y, +Z, -Z +static const float3 faceNormals[6] = { + float3( 1, 0, 0), float3(-1, 0, 0), + float3( 0, 1, 0), float3( 0,-1, 0), + float3( 0, 0, 1), float3( 0, 0,-1) +}; + +[RootSignature(VOXEL_ROOTSIG)] +[numthreads(64, 1, 1)] +void main(uint3 DTid : SV_DispatchThreadID) +{ + uint chunkIdx = DTid.x; + if (chunkIdx >= chunkCount) return; + + GPUChunkInfo info = chunkInfoBuffer[chunkIdx]; + if (info.quadCount == 0) return; + + float3 aabbMin = info.worldPos.xyz; + float3 aabbMax = aabbMin + (float3)chunkSize; + + if (!frustumTestAABB(aabbMin, aabbMax)) return; + + // Camera-to-chunk vector for backface test + float3 chunkCenter = (aabbMin + aabbMax) * 0.5; + float3 viewDir = chunkCenter - cameraPosition.xyz; + + // Emit one draw per visible face group + [unroll] + for (uint f = 0; f < 6; f++) + { + uint fCnt = getFaceCount(info, f); + if (fCnt == 0) continue; + + // Backface cull: if camera sees the back of this face group, skip it. + // A face group with normal N is back-facing if dot(viewDir, N) > 0. + // But we need a per-face test relative to the chunk AABB, not just center: + // face +X: back-facing if camera.x < aabbMin.x (camera is on -X side) + // face -X: back-facing if camera.x > aabbMax.x (camera is on +X side) + // This is more conservative and correct than dot product with center. + bool backFacing = false; + switch (f) + { + case 0: backFacing = (cameraPosition.x < aabbMin.x); break; // +X + case 1: backFacing = (cameraPosition.x > aabbMax.x); break; // -X + case 2: backFacing = (cameraPosition.y < aabbMin.y); break; // +Y + case 3: backFacing = (cameraPosition.y > aabbMax.y); break; // -Y + case 4: backFacing = (cameraPosition.z < aabbMin.z); break; // +Z + case 5: backFacing = (cameraPosition.z > aabbMax.z); break; // -Z + } + if (backFacing) continue; + + uint drawIdx; + drawCount.InterlockedAdd(0, 1, drawIdx); + + // The face group's quads start at (chunk's mega-buffer offset + face offset within chunk) + uint faceQuadOffset = info.quadOffset + getFaceOffset(info, f); + + IndirectDrawArgsInstanced args; + args.vertexCountPerInstance = fCnt * 6; + args.instanceCount = 1; + args.startVertexLocation = faceQuadOffset * 6; + args.startInstanceLocation = chunkIdx; + indirectArgs[drawIdx] = args; + } +} diff --git a/shaders/voxelMeshCS.hlsl b/shaders/voxelMeshCS.hlsl new file mode 100644 index 0000000..12aad1a --- /dev/null +++ b/shaders/voxelMeshCS.hlsl @@ -0,0 +1,86 @@ +// BVLE Voxels - GPU Compute Mesher (Binary Face Culling only) +// 1 thread per voxel: checks 6 neighbors, emits 1x1 PackedQuad per visible face. +// No greedy merge — this is the simple GPU baseline for benchmark comparison. + +#include "voxelCommon.hlsli" + +// Push constants: chunk index + output offset +struct MeshPush { + uint chunkIndex; // which chunk to mesh + uint voxelBufferOffset; // offset into the voxel data buffer (in uint16 pairs) + uint quadBufferOffset; // offset into the output quad buffer (in quads) + uint maxOutputQuads; // safety cap on output + uint pad[8]; // pad to 48 bytes (12 x uint32) +}; +[[vk::push_constant]] ConstantBuffer push : register(b999); + +// Input: voxel data for one chunk (32^3 = 32768 voxels, packed as uint16 pairs in uint) +// Each uint holds 2 voxels: low 16 bits = voxel A, high 16 bits = voxel B +StructuredBuffer voxelData : register(t0); + +// Output: packed quads (append buffer with atomic counter) +RWStructuredBuffer outputQuads : register(u0); // uint2 = 8 bytes = PackedQuad +RWByteAddressBuffer quadCounter : register(u1); // atomic counter + +// Constants +static const uint CSIZE = 32; +static const uint CVOL = CSIZE * CSIZE * CSIZE; // 32768 + +// Read a single voxel (16-bit) from the packed buffer +uint readVoxel(uint flatIndex) { + uint pairIndex = flatIndex >> 1; // which uint (2 voxels per uint) + uint shift = (flatIndex & 1) * 16; // 0 or 16 + return (voxelData[push.voxelBufferOffset + pairIndex] >> shift) & 0xFFFF; +} + +// Check if neighbor is air (handles out-of-bounds as air for chunk boundaries) +bool isNeighborAir(int3 pos, int3 dir) { + int3 n = pos + dir; + // Out-of-chunk = treat as air (boundary faces always visible) + if (any(n < 0) || any(n >= (int3)CSIZE)) + return true; + uint flatN = (uint)n.x + (uint)n.y * CSIZE + (uint)n.z * CSIZE * CSIZE; + return readVoxel(flatN) == 0; // materialID 0 = air +} + +// Pack a quad into uint2 (matches CPU PackedQuad format) +uint2 packQuad(uint x, uint y, uint z, uint w, uint h, uint face, uint matID) { + uint lo = x | (y << 6) | (z << 12) | (w << 18) | (h << 24) | (face << 30); + uint hi = (face >> 2) | (matID << 1) | (0 << 9) | (0 << 17); // AO=0, flags=0 + return uint2(lo, hi); +} + +// Face directions +static const int3 faceDirs[6] = { + int3( 1, 0, 0), int3(-1, 0, 0), + int3( 0, 1, 0), int3( 0,-1, 0), + int3( 0, 0, 1), int3( 0, 0,-1) +}; + +[RootSignature(VOXEL_ROOTSIG)] +[numthreads(8, 8, 8)] // 512 threads = covers 32^3 with 64 groups of 512 +void main(uint3 DTid : SV_DispatchThreadID) +{ + if (any(DTid >= CSIZE)) return; + + uint flatIdx = DTid.x + DTid.y * CSIZE + DTid.z * CSIZE * CSIZE; + uint voxel = readVoxel(flatIdx); + if (voxel == 0) return; // air voxel, nothing to emit + + uint matID = voxel >> 8; // high 8 bits = material ID + + // Check each face direction + [unroll] + for (uint f = 0; f < 6; f++) { + if (!isNeighborAir((int3)DTid, faceDirs[f])) continue; + + // Emit a 1x1 quad + uint slot; + quadCounter.InterlockedAdd(0, 1, slot); + if (slot >= push.maxOutputQuads) return; // overflow guard + + outputQuads[push.quadBufferOffset + slot] = packQuad( + DTid.x, DTid.y, DTid.z, 1, 1, f, matID + ); + } +} diff --git a/shaders/voxelPS.hlsl b/shaders/voxelPS.hlsl new file mode 100644 index 0000000..6605e4d --- /dev/null +++ b/shaders/voxelPS.hlsl @@ -0,0 +1,83 @@ +// BVLE Voxels - Pixel Shader (Triplanar textured with simple lighting) + +#include "voxelCommon.hlsli" + +Texture2DArray materialTextures : register(t1); +SamplerState materialSampler : register(s0); + +struct PSInput { + float4 position : SV_POSITION; + float3 worldPos : WORLDPOS; + float3 normal : NORMAL; + float2 uv : TEXCOORD0; + nointerpolation uint materialID : MATERIALID; + nointerpolation uint faceID : FACEID; + nointerpolation float debugFlag : DEBUGFLAG; + float ao : AO; +}; + +// Triplanar blend weights +float3 triplanarWeights(float3 normal, float sharpness) { + float3 w = abs(normal); + w = pow(w, (float3)sharpness); + return w / (w.x + w.y + w.z + 0.0001); +} + +float3 sampleTriplanar(float3 worldPos, float3 normal, uint texIndex, float tiling) { + float3 w = triplanarWeights(normal, 4.0); + + float3 colX = materialTextures.Sample(materialSampler, float3(worldPos.yz * tiling, (float)texIndex)).rgb; + float3 colY = materialTextures.Sample(materialSampler, float3(worldPos.xz * tiling, (float)texIndex)).rgb; + float3 colZ = materialTextures.Sample(materialSampler, float3(worldPos.xy * tiling, (float)texIndex)).rgb; + + return colX * w.x + colY * w.y + colZ * w.z; +} + +// Debug face colors +static const float3 faceDebugColors[6] = { + float3(1.0, 0.2, 0.2), // 0: +X = RED + float3(0.5, 0.0, 0.0), // 1: -X = DARK RED + float3(0.2, 1.0, 0.2), // 2: +Y = GREEN + float3(0.0, 0.5, 0.0), // 3: -Y = DARK GREEN + float3(0.2, 0.2, 1.0), // 4: +Z = BLUE + float3(0.0, 0.0, 0.5), // 5: -Z = DARK BLUE +}; + +[RootSignature(VOXEL_ROOTSIG)] +float4 main(PSInput input) : SV_TARGET0 +{ + // ── DEBUG MODE: face direction colors ── + if (input.debugFlag > 0.5) + { + uint fid = min(input.faceID, 5u); + float3 faceColor = faceDebugColors[fid]; + float2 checker = floor(input.worldPos.xz * 0.5); + float check = frac((checker.x + checker.y) * 0.5) * 2.0; + faceColor *= (0.85 + 0.15 * check); + return float4(faceColor, 1.0); + } + + // ── NORMAL MODE: triplanar textured ── + float3 N = normalize(input.normal); + float3 L = normalize(-sunDirection.xyz); + float NdotL = max(dot(N, L), 0.0); + + float3 baseColor = N * 0.5 + 0.5; + + uint texIndex = clamp(input.materialID - 1u, 0u, 4u); + float tiling = textureTiling; + float3 texColor = sampleTriplanar(input.worldPos, N, texIndex, tiling); + + float3 albedo = (input.materialID > 0u) ? texColor : baseColor; + + float3 ambient = float3(0.15, 0.18, 0.25); + float3 diffuse = sunColor.rgb * NdotL; + float3 color = albedo * (ambient + diffuse) * input.ao; + + float dist = length(input.worldPos - cameraPosition.xyz); + float fog = 1.0 - exp(-dist * 0.003); + float3 fogColor = float3(0.55, 0.70, 0.90); + color = lerp(color, fogColor, saturate(fog)); + + return float4(color, 1.0); +} diff --git a/shaders/voxelVS.hlsl b/shaders/voxelVS.hlsl new file mode 100644 index 0000000..3a568e9 --- /dev/null +++ b/shaders/voxelVS.hlsl @@ -0,0 +1,127 @@ +// BVLE Voxels - Vertex Shader (Vertex Pulling from mega-buffer) +// Phase 2: uses SV_InstanceID to look up chunk info instead of push constants. + +#include "voxelCommon.hlsli" + +struct PackedQuad { + uint2 data; // 8 bytes = 2 x uint32 +}; + +StructuredBuffer quadBuffer : register(t0); +StructuredBuffer chunkInfoBuffer : register(t2); + +// Push constants: chunk index + quad offset for current draw call +struct VoxelPush { + uint chunkIndex; + uint quadOffset; // offset into mega quad buffer (in quads) + uint pad0, pad1, pad2, pad3, pad4, pad5, pad6, pad7, pad8, pad9; +}; +[[vk::push_constant]] ConstantBuffer push : register(b999); + +struct VSOutput { + float4 position : SV_POSITION; + float3 worldPos : WORLDPOS; + float3 normal : NORMAL; + float2 uv : TEXCOORD0; + nointerpolation uint materialID : MATERIALID; + nointerpolation uint faceID : FACEID; + nointerpolation float debugFlag : DEBUGFLAG; + float ao : AO; +}; + +// Unpack 64 bits from 2 x uint32 +void unpackQuad(uint2 raw, out uint px, out uint py, out uint pz, + out uint w, out uint h, out uint face, + out uint matID, out uint ao) +{ + uint lo = raw.x; + uint hi = raw.y; + px = lo & 0x3F; + py = (lo >> 6) & 0x3F; + pz = (lo >> 12) & 0x3F; + w = (lo >> 18) & 0x3F; + h = (lo >> 24) & 0x3F; + face = ((lo >> 30) & 0x3) | ((hi & 0x1) << 2); + matID = (hi >> 1) & 0xFF; + ao = (hi >> 9) & 0xFF; +} + +// Face normals: +X, -X, +Y, -Y, +Z, -Z +static const float3 faceNormals[6] = { + float3( 1, 0, 0), float3(-1, 0, 0), + float3( 0, 1, 0), float3( 0,-1, 0), + float3( 0, 0, 1), float3( 0, 0,-1) +}; + +// Face U/V tangent axes for quad expansion +static const float3 faceU[6] = { + float3(0, 1, 0), float3(0, 1, 0), + float3(1, 0, 0), float3(1, 0, 0), + float3(1, 0, 0), float3(1, 0, 0) +}; + +static const float3 faceV[6] = { + float3(0, 0, 1), float3(0, 0, 1), + float3(0, 0, 1), float3(0, 0, 1), + float3(0, 1, 0), float3(0, 1, 0) +}; + +[RootSignature(VOXEL_ROOTSIG)] +VSOutput main(uint vertexID : SV_VertexID) +{ + VSOutput output; + + // Look up chunk info via push constant (SV_InstanceID doesn't include StartInstanceLocation in D3D12) + GPUChunkInfo info = chunkInfoBuffer[push.chunkIndex]; + + // 6 vertices per quad (2 triangles) + // Use push.quadOffset instead of relying on StartVertexLocation in SV_VertexID + uint localVertex = vertexID; + uint quadIndex = push.quadOffset + (localVertex / 6); + uint cornerIndex = localVertex % 6; + + PackedQuad packed = quadBuffer[quadIndex]; + uint px, py, pz, w, h, face, matID, ao; + unpackQuad(packed.data, px, py, pz, w, h, face, matID, ao); + + // Corner offsets for 2 triangles (6 vertices per quad) + // cross(U,V) matches N for faces: +X(0), -Y(3), +Z(4) -> CW corners + // cross(U,V) opposes N for faces: -X(1), +Y(2), -Z(5) -> CCW corners + static const float2 cornersCW[6] = { + float2(0, 0), float2(0, 1), float2(1, 0), + float2(1, 0), float2(0, 1), float2(1, 1) + }; + static const float2 cornersCCW[6] = { + float2(0, 0), float2(1, 0), float2(0, 1), + float2(0, 1), float2(1, 0), float2(1, 1) + }; + bool useCCW = (face == 1 || face == 2 || face == 5); + float2 corner = useCCW ? cornersCCW[cornerIndex] : cornersCW[cornerIndex]; + + float3 basePos = float3((float)px, (float)py, (float)pz); + float3 normal = faceNormals[face]; + float3 uAxis = faceU[face]; + float3 vAxis = faceV[face]; + + // Positive faces: offset by 1 in normal direction + float3 faceOffset = (face % 2 == 0) ? normal : float3(0, 0, 0); + + // Expand quad + float3 localPos = basePos + faceOffset + uAxis * corner.x * (float)w + vAxis * corner.y * (float)h; + float3 worldPos = localPos + info.worldPos.xyz; + + output.position = mul(viewProjection, float4(worldPos, 1.0)); + output.worldPos = worldPos; + output.normal = normal; + output.uv = corner * float2((float)w, (float)h) * textureTiling; + output.materialID = matID; + output.faceID = face; + output.debugFlag = info.worldPos.w; + + // AO: 4 corners x 2 bits + uint aoCorner = min(cornerIndex, 3u); + float aoValue = (float)((ao >> (aoCorner * 2u)) & 3u) / 3.0; + output.ao = 1.0 - aoValue * 0.4; + + return output; +} diff --git a/src/app/main.cpp b/src/app/main.cpp new file mode 100644 index 0000000..e217545 --- /dev/null +++ b/src/app/main.cpp @@ -0,0 +1,187 @@ +#include "WickedEngine.h" +#include "voxel/VoxelRenderer.h" +#include +#include +#pragma comment(lib, "dbghelp.lib") + +// ── BVLE Voxels - Prototype Application ───────────────────────── +// Wicked Engine based voxel engine prototype for performance validation. + +// ── Crash handler: writes stack trace + minidump on unhandled exception ── +static LONG WINAPI CrashHandler(EXCEPTION_POINTERS* ep) { + std::ofstream crash("bvle_crash.log", std::ios::trunc); + crash << "=== BVLE CRASH REPORT ===" << std::endl; + + DWORD code = ep->ExceptionRecord->ExceptionCode; + PVOID addr = ep->ExceptionRecord->ExceptionAddress; + crash << "Exception code: 0x" << std::hex << code << std::endl; + crash << "Crash address: 0x" << addr << std::endl; + + if (code == EXCEPTION_ACCESS_VIOLATION && ep->ExceptionRecord->NumberParameters >= 2) { + ULONG_PTR type = ep->ExceptionRecord->ExceptionInformation[0]; + ULONG_PTR target = ep->ExceptionRecord->ExceptionInformation[1]; + crash << (type == 0 ? "Reading" : "Writing") << " address: 0x" << std::hex << target << std::endl; + } + + HANDLE process = GetCurrentProcess(); + HANDLE thread = GetCurrentThread(); + SymInitialize(process, NULL, TRUE); + + CONTEXT* ctx = ep->ContextRecord; + STACKFRAME64 frame = {}; + frame.AddrPC.Offset = ctx->Rip; frame.AddrPC.Mode = AddrModeFlat; + frame.AddrFrame.Offset = ctx->Rbp; frame.AddrFrame.Mode = AddrModeFlat; + frame.AddrStack.Offset = ctx->Rsp; frame.AddrStack.Mode = AddrModeFlat; + + crash << "\nStack trace:" << std::endl; + for (int i = 0; i < 32; i++) { + if (!StackWalk64(IMAGE_FILE_MACHINE_AMD64, process, thread, &frame, + ctx, NULL, SymFunctionTableAccess64, SymGetModuleBase64, NULL)) + break; + char symbolBuf[sizeof(SYMBOL_INFO) + 256]; + SYMBOL_INFO* symbol = (SYMBOL_INFO*)symbolBuf; + symbol->SizeOfStruct = sizeof(SYMBOL_INFO); + symbol->MaxNameLen = 255; + DWORD64 disp64 = 0; + crash << " [" << i << "] 0x" << std::hex << frame.AddrPC.Offset; + if (SymFromAddr(process, frame.AddrPC.Offset, &disp64, symbol)) + crash << " " << symbol->Name << " +0x" << disp64; + IMAGEHLP_LINE64 line = {}; line.SizeOfStruct = sizeof(IMAGEHLP_LINE64); + DWORD disp32 = 0; + if (SymGetLineFromAddr64(process, frame.AddrPC.Offset, &disp32, &line)) + crash << " (" << line.FileName << ":" << std::dec << line.LineNumber << ")"; + crash << std::endl; + } + + HANDLE dumpFile = CreateFileA("bvle_crash.dmp", GENERIC_WRITE, 0, NULL, + CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL); + if (dumpFile != INVALID_HANDLE_VALUE) { + MINIDUMP_EXCEPTION_INFORMATION mei; + mei.ThreadId = GetCurrentThreadId(); + mei.ExceptionPointers = ep; + mei.ClientPointers = FALSE; + MiniDumpWriteDump(process, GetCurrentProcessId(), dumpFile, + MiniDumpWithDataSegs, &mei, NULL, NULL); + CloseHandle(dumpFile); + } + + crash.close(); + SymCleanup(process); + return EXCEPTION_EXECUTE_HANDLER; +} + +static wi::Application application; +static voxel::VoxelRenderPath renderPath; + +int APIENTRY wWinMain( + _In_ HINSTANCE hInstance, + _In_opt_ HINSTANCE hPrevInstance, + _In_ LPWSTR lpCmdLine, + _In_ int nCmdShow) +{ + SetUnhandledExceptionFilter(CrashHandler); + + // Win32 window setup + static auto WndProc = [](HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam) -> LRESULT + { + switch (message) + { + case WM_SIZE: + case WM_DPICHANGED: + if (application.is_window_active) + application.SetWindow(hWnd); + break; + case WM_CHAR: + switch (wParam) + { + case VK_BACK: + wi::gui::TextInputField::DeleteFromInput(); + break; + case VK_RETURN: + break; + default: + wi::gui::TextInputField::AddInput((const wchar_t)wParam); + break; + } + break; + case WM_INPUT: + wi::input::rawinput::ParseMessage((void*)lParam); + break; + case WM_KILLFOCUS: + application.is_window_active = false; + break; + case WM_SETFOCUS: + application.is_window_active = true; + break; + case WM_DESTROY: + PostQuitMessage(0); + break; + default: + return DefWindowProc(hWnd, message, wParam, lParam); + } + return 0; + }; + + SetProcessDpiAwarenessContext(DPI_AWARENESS_CONTEXT_PER_MONITOR_AWARE_V2); + + WNDCLASSEXW wcex = {}; + wcex.cbSize = sizeof(WNDCLASSEX); + wcex.style = CS_HREDRAW | CS_VREDRAW; + wcex.lpfnWndProc = WndProc; + wcex.hInstance = hInstance; + wcex.hCursor = LoadCursor(nullptr, IDC_ARROW); + wcex.hbrBackground = (HBRUSH)(COLOR_WINDOW + 1); + wcex.lpszClassName = L"BVLEVoxels"; + RegisterClassExW(&wcex); + + HWND hWnd = CreateWindowW( + wcex.lpszClassName, + L"BVLE Voxels - Prototype", + WS_OVERLAPPEDWINDOW, + CW_USEDEFAULT, 0, + 1920, 1080, + nullptr, nullptr, hInstance, nullptr + ); + ShowWindow(hWnd, SW_SHOWMAXIMIZED); + + // Initialize Wicked Engine (selects DX12 by default on Windows, Vulkan on Linux) + // Pass "vulkan" as command line argument to force Vulkan backend + // Pass "debugdevice" for D3D debug layer, "gpuvalidation" for GPU-based validation + application.SetWindow(hWnd); + wi::arguments::Parse(lpCmdLine); + + + // Redirect Wicked Engine log to file + wi::backlog::SetLogFile("bvle_backlog.txt"); + + // Info display + application.infoDisplay.active = true; + application.infoDisplay.watermark = false; + application.infoDisplay.resolution = true; + application.infoDisplay.fpsinfo = true; + application.infoDisplay.heap_allocation_counter = true; + + // Check for "debug" argument to enable face-color debug mode + if (wi::arguments::HasArgument("debug")) { + renderPath.debugMode = true; + } + + // Activate our custom voxel render path + application.ActivatePath(&renderPath); + + // Main loop + MSG msg = { 0 }; + while (msg.message != WM_QUIT) + { + if (PeekMessage(&msg, NULL, 0, 0, PM_REMOVE)) { + TranslateMessage(&msg); + DispatchMessage(&msg); + } + else { + application.Run(); + } + } + + wi::jobsystem::ShutDown(); + return (int)msg.wParam; +} diff --git a/src/voxel/VoxelMesher.cpp b/src/voxel/VoxelMesher.cpp new file mode 100644 index 0000000..b082ee3 --- /dev/null +++ b/src/voxel/VoxelMesher.cpp @@ -0,0 +1,238 @@ +#include "VoxelMesher.h" +#include +#include + +namespace voxel { + +// ── Build binary masks per axis ───────────────────────────────── +// For each axis, solid[u][v] is a 32-bit mask where bit i=1 means +// voxel at position i along that axis is solid. +void VoxelMesher::buildAxisMasks(const Chunk& chunk, AxisMasks masks[3]) { + std::memset(masks, 0, sizeof(AxisMasks) * 3); + + for (int z = 0; z < CHUNK_SIZE; z++) { + for (int y = 0; y < CHUNK_SIZE; y++) { + for (int x = 0; x < CHUNK_SIZE; x++) { + if (!chunk.at(x, y, z).isEmpty()) { + // X-axis: march along X, indexed by [Y][Z] + masks[0].solid[y][z] |= (1u << x); + // Y-axis: march along Y, indexed by [X][Z] + masks[1].solid[x][z] |= (1u << y); + // Z-axis: march along Z, indexed by [X][Y] + masks[2].solid[x][y] |= (1u << z); + } + } + } + } +} + +// Helper: get voxel, considering neighbor chunks for boundary faces +static VoxelData getVoxelSafe(const Chunk& chunk, const VoxelWorld& world, int x, int y, int z) { + if (chunk.isInBounds(x, y, z)) { + return chunk.at(x, y, z); + } + // Cross-chunk lookup + int wx = chunk.pos.x * CHUNK_SIZE + x; + int wy = chunk.pos.y * CHUNK_SIZE + y; + int wz = chunk.pos.z * CHUNK_SIZE + z; + return world.getVoxel(wx, wy, wz); +} + +// ── Greedy Merge ──────────────────────────────────────────────── +// For a given face direction, merge visible faces of the same material +// into maximal rectangular quads. +void VoxelMesher::greedyMerge( + const Chunk& chunk, + const VoxelWorld& world, + uint8_t face, + const uint32_t faceMasks[CHUNK_SIZE][CHUNK_SIZE], + std::vector& outQuads +) { + // Determine axis mapping based on face + // face 0,1 = X axis -> iterate over Y,Z slices + // face 2,3 = Y axis -> iterate over X,Z slices + // face 4,5 = Z axis -> iterate over X,Y slices + + // For each slice along the face normal axis + for (int depth = 0; depth < CHUNK_SIZE; depth++) { + // Build a 2D grid of material IDs for this slice + uint8_t matGrid[CHUNK_SIZE][CHUNK_SIZE]; + bool visited[CHUNK_SIZE][CHUNK_SIZE]; + std::memset(matGrid, 0, sizeof(matGrid)); + std::memset(visited, 0, sizeof(visited)); + + int faceCount = 0; + for (int v = 0; v < CHUNK_SIZE; v++) { + for (int u = 0; u < CHUNK_SIZE; u++) { + // Check if this face is visible at this depth + bool faceVisible = false; + + int x, y, z; + switch (face) { + case FACE_POS_X: x = depth; y = u; z = v; faceVisible = (faceMasks[u][v] >> depth) & 1; break; + case FACE_NEG_X: x = depth; y = u; z = v; faceVisible = (faceMasks[u][v] >> depth) & 1; break; + case FACE_POS_Y: y = depth; x = u; z = v; faceVisible = (faceMasks[u][v] >> depth) & 1; break; + case FACE_NEG_Y: y = depth; x = u; z = v; faceVisible = (faceMasks[u][v] >> depth) & 1; break; + case FACE_POS_Z: z = depth; x = u; y = v; faceVisible = (faceMasks[u][v] >> depth) & 1; break; + case FACE_NEG_Z: z = depth; x = u; y = v; faceVisible = (faceMasks[u][v] >> depth) & 1; break; + } + + if (faceVisible) { + matGrid[v][u] = chunk.at(x, y, z).getMaterialID(); + faceCount++; + } + } + } + + if (faceCount == 0) continue; + + // Greedy merge: scan row by row, merge same-material quads + for (int v = 0; v < CHUNK_SIZE; v++) { + for (int u = 0; u < CHUNK_SIZE; u++) { + if (visited[v][u] || matGrid[v][u] == 0) continue; + + uint8_t mat = matGrid[v][u]; + + // Expand width (along u) + int w = 1; + while (u + w < CHUNK_SIZE && !visited[v][u + w] && matGrid[v][u + w] == mat) { + w++; + } + + // Expand height (along v) + int h = 1; + bool canExpand = true; + while (v + h < CHUNK_SIZE && canExpand) { + for (int du = 0; du < w; du++) { + if (visited[v + h][u + du] || matGrid[v + h][u + du] != mat) { + canExpand = false; + break; + } + } + if (canExpand) h++; + } + + // Mark as visited + for (int dv = 0; dv < h; dv++) { + for (int du = 0; du < w; du++) { + visited[v + dv][u + du] = true; + } + } + + // Compute the actual position in chunk-local coords + uint8_t px, py, pz; + switch (face) { + case FACE_POS_X: px = (uint8_t)depth; py = (uint8_t)u; pz = (uint8_t)v; break; + case FACE_NEG_X: px = (uint8_t)depth; py = (uint8_t)u; pz = (uint8_t)v; break; + case FACE_POS_Y: px = (uint8_t)u; py = (uint8_t)depth; pz = (uint8_t)v; break; + case FACE_NEG_Y: px = (uint8_t)u; py = (uint8_t)depth; pz = (uint8_t)v; break; + case FACE_POS_Z: px = (uint8_t)u; py = (uint8_t)v; pz = (uint8_t)depth; break; + case FACE_NEG_Z: px = (uint8_t)u; py = (uint8_t)v; pz = (uint8_t)depth; break; + default: px = py = pz = 0; break; + } + + // Width/height in the quad's local UV space + uint8_t qw, qh; + switch (face) { + case FACE_POS_X: case FACE_NEG_X: qw = (uint8_t)w; qh = (uint8_t)h; break; + case FACE_POS_Y: case FACE_NEG_Y: qw = (uint8_t)w; qh = (uint8_t)h; break; + case FACE_POS_Z: case FACE_NEG_Z: qw = (uint8_t)w; qh = (uint8_t)h; break; + default: qw = qh = 1; break; + } + + outQuads.push_back(PackedQuad::create( + px, py, pz, qw, qh, face, mat, 0, 0 + )); + } + } + } +} + +uint32_t VoxelMesher::meshChunk(Chunk& chunk, const VoxelWorld& world) { + chunk.quads.clear(); + + // Step 1: Build binary solid masks per axis + AxisMasks axisMasks[3]; + buildAxisMasks(chunk, axisMasks); + + // Step 2: For each face direction, compute visible face masks + // then do greedy merge + for (uint8_t face = 0; face < FACE_COUNT; face++) { + int axis = face / 2; // 0=X, 1=Y, 2=Z + bool positive = (face % 2 == 0); + + // Compute visible face masks + // A face is visible if the voxel is solid and the neighbor in the face direction is air + uint32_t faceMasks[CHUNK_SIZE][CHUNK_SIZE]; + std::memset(faceMasks, 0, sizeof(faceMasks)); + + for (int v = 0; v < CHUNK_SIZE; v++) { + for (int u = 0; u < CHUNK_SIZE; u++) { + uint32_t solid = axisMasks[axis].solid[u][v]; + if (solid == 0) continue; + + uint32_t visible; + if (positive) { + // +dir: face visible if solid here and NOT solid at pos+1 + // Shift right: neighbor is at bit+1 + uint32_t neighbor = (solid >> 1); + // The highest bit has no neighbor in this chunk - check boundary + visible = solid & ~neighbor; + // Bit 31 (chunk boundary): need to check neighbor chunk + // For now, always show boundary faces + if (solid & (1u << (CHUNK_SIZE - 1))) { + // Check if neighbor chunk's voxel is empty + int nx, ny, nz; + switch (axis) { + case 0: nx = CHUNK_SIZE; ny = u; nz = v; break; // X+ + case 1: nx = u; ny = CHUNK_SIZE; nz = v; break; // Y+ + case 2: nx = u; ny = v; nz = CHUNK_SIZE; break; // Z+ + default: nx = ny = nz = 0; + } + if (!getVoxelSafe(chunk, world, nx, ny, nz).isEmpty()) { + visible &= ~(1u << (CHUNK_SIZE - 1)); // hide boundary face + } + } + } else { + // -dir: face visible if solid here and NOT solid at pos-1 + uint32_t neighbor = (solid << 1); + visible = solid & ~neighbor; + // Bit 0 (chunk boundary) + if (solid & 1u) { + int nx, ny, nz; + switch (axis) { + case 0: nx = -1; ny = u; nz = v; break; + case 1: nx = u; ny = -1; nz = v; break; + case 2: nx = u; ny = v; nz = -1; break; + default: nx = ny = nz = 0; + } + if (!getVoxelSafe(chunk, world, nx, ny, nz).isEmpty()) { + visible &= ~1u; // hide boundary face + } + } + } + + faceMasks[u][v] = visible; + } + } + + uint32_t beforeCount = (uint32_t)chunk.quads.size(); + greedyMerge(chunk, world, face, faceMasks, chunk.quads); + chunk.faceOffsets[face] = beforeCount; + chunk.faceCounts[face] = (uint32_t)chunk.quads.size() - beforeCount; + } + + chunk.quadCount = (uint32_t)chunk.quads.size(); + chunk.dirty = false; + return chunk.quadCount; +} + +uint8_t VoxelMesher::calcAO(const VoxelWorld& world, const ChunkPos& cpos, + int x, int y, int z, uint8_t face) { + // Simplified AO: count occluding neighbors around the vertex + // Returns packed 4x2-bit AO for the 4 corners + // TODO: implement proper per-corner AO + return 0; +} + +} // namespace voxel diff --git a/src/voxel/VoxelMesher.h b/src/voxel/VoxelMesher.h new file mode 100644 index 0000000..49f329f --- /dev/null +++ b/src/voxel/VoxelMesher.h @@ -0,0 +1,40 @@ +#pragma once +#include "VoxelTypes.h" +#include "VoxelWorld.h" + +namespace voxel { + +// ── Binary Greedy Mesher (CPU implementation, port of cgerikj) ── +// Generates PackedQuad list for a chunk using binary greedy meshing. +// For each axis, uses bitmask operations to find visible faces, +// then greedily merges same-material quads. +class VoxelMesher { +public: + // Mesh a single chunk, populating chunk.quads + // Returns number of quads generated + static uint32_t meshChunk(Chunk& chunk, const VoxelWorld& world); + +private: + // Per-axis binary face culling + // col_masks[axis][u][v] = 32-bit mask of solid voxels along axis + struct AxisMasks { + uint32_t solid[CHUNK_SIZE][CHUNK_SIZE]; // solid[u][v] = bitmask along axis + }; + + static void buildAxisMasks(const Chunk& chunk, AxisMasks masks[3]); + + // Greedy merge faces of same material into larger quads + static void greedyMerge( + const Chunk& chunk, + const VoxelWorld& world, + uint8_t face, + const uint32_t faceMasks[CHUNK_SIZE][CHUNK_SIZE], + std::vector& outQuads + ); + + // Calculate ambient occlusion for quad corners + static uint8_t calcAO(const VoxelWorld& world, const ChunkPos& cpos, + int x, int y, int z, uint8_t face); +}; + +} // namespace voxel diff --git a/src/voxel/VoxelRenderer.cpp b/src/voxel/VoxelRenderer.cpp new file mode 100644 index 0000000..890a12c --- /dev/null +++ b/src/voxel/VoxelRenderer.cpp @@ -0,0 +1,597 @@ +#include "VoxelRenderer.h" +#include "wiPrimitive.h" +#include +#include + +using namespace wi::graphics; + +namespace voxel { + +// ── VoxelRenderer Implementation ──────────────────────────────── + +VoxelRenderer::VoxelRenderer() = default; +VoxelRenderer::~VoxelRenderer() { shutdown(); } + +void VoxelRenderer::initialize(GraphicsDevice* dev) { + device_ = dev; + if (!device_) return; + + createPipeline(); + if (!pso_.IsValid()) { + wi::backlog::post("VoxelRenderer: pipeline creation failed", wi::backlog::LogLevel::Error); + initialized_ = false; + return; + } + generateTextures(); + + // Create mega quad buffer (SRV for vertex pulling) + GPUBufferDesc megaDesc; + megaDesc.size = MEGA_BUFFER_CAPACITY * sizeof(PackedQuad); + megaDesc.bind_flags = BindFlag::SHADER_RESOURCE; + megaDesc.misc_flags = ResourceMiscFlag::BUFFER_STRUCTURED; + megaDesc.stride = sizeof(PackedQuad); + megaDesc.usage = Usage::DEFAULT; + device_->CreateBuffer(&megaDesc, nullptr, &megaQuadBuffer_); + + // Create chunk info buffer (SRV for VS chunk lookup) + GPUBufferDesc infoDesc; + infoDesc.size = MAX_CHUNKS * sizeof(GPUChunkInfo); + infoDesc.bind_flags = BindFlag::SHADER_RESOURCE; + infoDesc.misc_flags = ResourceMiscFlag::BUFFER_STRUCTURED; + infoDesc.stride = sizeof(GPUChunkInfo); + infoDesc.usage = Usage::DEFAULT; + device_->CreateBuffer(&infoDesc, nullptr, &chunkInfoBuffer_); + + // Create indirect args buffer (for DrawInstancedIndirectCount, up to 6 draws per chunk) + // UAV bind flag needed for GPU cull compute shader to write args + GPUBufferDesc argsDesc; + argsDesc.size = MAX_DRAWS * sizeof(IndirectDrawArgs); + argsDesc.bind_flags = BindFlag::UNORDERED_ACCESS; + argsDesc.misc_flags = ResourceMiscFlag::BUFFER_STRUCTURED | ResourceMiscFlag::INDIRECT_ARGS; + argsDesc.stride = sizeof(IndirectDrawArgs); + argsDesc.usage = Usage::DEFAULT; + device_->CreateBuffer(&argsDesc, nullptr, &indirectArgsBuffer_); + + // Create draw count buffer (single uint32, raw for RWByteAddressBuffer) + // UAV bind flag needed for GPU cull compute shader atomic counter + GPUBufferDesc countDesc; + countDesc.size = sizeof(uint32_t); + countDesc.bind_flags = BindFlag::UNORDERED_ACCESS; + countDesc.misc_flags = ResourceMiscFlag::BUFFER_RAW | ResourceMiscFlag::INDIRECT_ARGS; + countDesc.usage = Usage::DEFAULT; + device_->CreateBuffer(&countDesc, nullptr, &drawCountBuffer_); + + // ── GPU Timestamp Queries ────────────────────────────────────── + GPUQueryHeapDesc queryDesc; + queryDesc.type = GpuQueryType::TIMESTAMP; + queryDesc.query_count = TS_COUNT; + device_->CreateQueryHeap(&queryDesc, ×tampHeap_); + + GPUBufferDesc readbackDesc; + readbackDesc.size = TS_COUNT * sizeof(uint64_t); + readbackDesc.usage = Usage::READBACK; + device_->CreateBuffer(&readbackDesc, nullptr, ×tampReadback_); + + // ── GPU Compute Mesher resources ───────────────────────────── + wi::renderer::LoadShader(ShaderStage::CS, meshShader_, "voxel/voxelMeshCS.cso"); + gpuMesherAvailable_ = meshShader_.IsValid(); + if (gpuMesherAvailable_) { + // Voxel data buffer: 1 chunk's worth (32^3 voxels / 2 per uint = 16384 uint) + GPUBufferDesc voxDesc; + voxDesc.size = (CHUNK_VOLUME / 2) * sizeof(uint32_t); + voxDesc.bind_flags = BindFlag::SHADER_RESOURCE; + voxDesc.misc_flags = ResourceMiscFlag::BUFFER_STRUCTURED; + voxDesc.stride = sizeof(uint32_t); + voxDesc.usage = Usage::DEFAULT; + device_->CreateBuffer(&voxDesc, nullptr, &voxelDataBuffer_); + + // GPU quad output: same capacity as mega-buffer + GPUBufferDesc gpuQDesc; + gpuQDesc.size = MEGA_BUFFER_CAPACITY * sizeof(uint64_t); // PackedQuad = 8 bytes + gpuQDesc.bind_flags = BindFlag::UNORDERED_ACCESS; + gpuQDesc.misc_flags = ResourceMiscFlag::BUFFER_STRUCTURED; + gpuQDesc.stride = sizeof(uint64_t); // uint2 = 8 bytes + gpuQDesc.usage = Usage::DEFAULT; + device_->CreateBuffer(&gpuQDesc, nullptr, &gpuQuadBuffer_); + + // Quad counter + GPUBufferDesc cntDesc; + cntDesc.size = sizeof(uint32_t); + cntDesc.bind_flags = BindFlag::UNORDERED_ACCESS; + cntDesc.misc_flags = ResourceMiscFlag::BUFFER_RAW; + cntDesc.usage = Usage::DEFAULT; + device_->CreateBuffer(&cntDesc, nullptr, &gpuQuadCounter_); + + wi::backlog::post("VoxelRenderer: GPU compute mesher available"); + } else { + wi::backlog::post("VoxelRenderer: GPU compute mesher not available", wi::backlog::LogLevel::Warning); + } + + cpuMegaQuads_.reserve(MEGA_BUFFER_CAPACITY); + cpuChunkInfo_.reserve(MAX_CHUNKS); + chunkSlots_.reserve(MAX_CHUNKS); + cpuIndirectArgs_.reserve(MAX_CHUNKS); + + initialized_ = true; + wi::backlog::post("VoxelRenderer: initialized (mega-buffer: " + + std::to_string(MEGA_BUFFER_CAPACITY) + " quads capacity)"); +} + +void VoxelRenderer::shutdown() { + chunkSlots_.clear(); + cpuChunkInfo_.clear(); + cpuMegaQuads_.clear(); + initialized_ = false; +} + +void VoxelRenderer::createPipeline() { + // Constant buffer for per-frame data + GPUBufferDesc cbDesc; + cbDesc.size = sizeof(VoxelConstants); + cbDesc.bind_flags = BindFlag::CONSTANT_BUFFER; + cbDesc.usage = Usage::DEFAULT; + device_->CreateBuffer(&cbDesc, nullptr, &constantBuffer_); + + // Anisotropic wrap sampler + SamplerDesc samplerDesc; + samplerDesc.filter = Filter::ANISOTROPIC; + samplerDesc.address_u = TextureAddressMode::WRAP; + samplerDesc.address_v = TextureAddressMode::WRAP; + samplerDesc.address_w = TextureAddressMode::WRAP; + samplerDesc.max_anisotropy = 16; + device_->CreateSampler(&samplerDesc, &sampler_); + + // Load shaders + wi::renderer::LoadShader(ShaderStage::VS, vertexShader_, "voxel/voxelVS.cso"); + wi::renderer::LoadShader(ShaderStage::PS, pixelShader_, "voxel/voxelPS.cso"); + wi::renderer::LoadShader(ShaderStage::CS, cullShader_, "voxel/voxelCullCS.cso"); + + if (!vertexShader_.IsValid() || !pixelShader_.IsValid()) { + wi::backlog::post("VoxelRenderer: shader loading failed", wi::backlog::LogLevel::Error); + return; + } + gpuCullingEnabled_ = cullShader_.IsValid(); + if (!gpuCullingEnabled_) { + wi::backlog::post("VoxelRenderer: cull compute shader not available, using CPU culling", wi::backlog::LogLevel::Warning); + } else { + wi::backlog::post("VoxelRenderer: GPU frustum+backface culling enabled"); + } + + // Pipeline: backface cull, depth test, opaque blend, triangle list + PipelineStateDesc psoDesc; + psoDesc.vs = &vertexShader_; + psoDesc.ps = &pixelShader_; + psoDesc.rs = wi::renderer::GetRasterizerState(wi::enums::RSTYPE_FRONT); + psoDesc.dss = wi::renderer::GetDepthStencilState(wi::enums::DSSTYPE_DEFAULT); + psoDesc.bs = wi::renderer::GetBlendState(wi::enums::BSTYPE_OPAQUE); + psoDesc.pt = PrimitiveTopology::TRIANGLELIST; + + device_->CreatePipelineState(&psoDesc, &pso_); +} + +// ── Procedural texture generation ─────────────────────────────── + +static void generateNoiseTexture(uint8_t* pixels, int w, int h, + uint8_t r0, uint8_t g0, uint8_t b0, + uint8_t r1, uint8_t g1, uint8_t b1, + uint32_t seed) +{ + uint32_t s = seed; + for (int y = 0; y < h; y++) { + for (int x = 0; x < w; x++) { + s = s * 1664525u + 1013904223u; + float noise = (float)(s & 0xFFFF) / 65535.0f; + float fx = (float)x / w; + float fy = (float)y / h; + float pattern = 0.5f + 0.5f * std::sin(fx * 20.0f + noise * 3.0f) * + std::cos(fy * 20.0f + noise * 3.0f); + float t = noise * 0.6f + pattern * 0.4f; + + int idx = (y * w + x) * 4; + pixels[idx + 0] = (uint8_t)(r0 + (r1 - r0) * t); + pixels[idx + 1] = (uint8_t)(g0 + (g1 - g0) * t); + pixels[idx + 2] = (uint8_t)(b0 + (b1 - b0) * t); + pixels[idx + 3] = 255; + } + } +} + +void VoxelRenderer::generateTextures() { + const int TEX_SIZE = 256; + const int NUM_MATERIALS = 5; + + std::vector allPixels(TEX_SIZE * TEX_SIZE * 4 * NUM_MATERIALS); + + struct MatColor { uint8_t r0,g0,b0, r1,g1,b1; uint32_t seed; }; + MatColor colors[NUM_MATERIALS] = { + { 60, 140, 40, 80, 180, 60, 101 }, // Grass + { 100, 70, 40, 140, 100, 60, 202 }, // Dirt + { 110, 110, 105, 140, 140, 130, 303 }, // Stone + { 200, 190, 140, 230, 220, 170, 404 }, // Sand + { 220, 225, 230, 245, 248, 252, 505 }, // Snow + }; + + for (int i = 0; i < NUM_MATERIALS; i++) { + auto& c = colors[i]; + generateNoiseTexture( + allPixels.data() + i * TEX_SIZE * TEX_SIZE * 4, + TEX_SIZE, TEX_SIZE, + c.r0, c.g0, c.b0, c.r1, c.g1, c.b1, c.seed + ); + } + + TextureDesc texDesc; + texDesc.type = TextureDesc::Type::TEXTURE_2D; + texDesc.width = TEX_SIZE; + texDesc.height = TEX_SIZE; + texDesc.array_size = NUM_MATERIALS; + texDesc.mip_levels = 1; + texDesc.format = Format::R8G8B8A8_UNORM; + texDesc.bind_flags = BindFlag::SHADER_RESOURCE; + texDesc.usage = Usage::DEFAULT; + + std::vector subData(NUM_MATERIALS); + for (int i = 0; i < NUM_MATERIALS; i++) { + subData[i].data_ptr = allPixels.data() + i * TEX_SIZE * TEX_SIZE * 4; + subData[i].row_pitch = TEX_SIZE * 4; + subData[i].slice_pitch = TEX_SIZE * TEX_SIZE * 4; + } + + device_->CreateTexture(&texDesc, subData.data(), &textureArray_); +} + +// ── Mega-buffer rebuild ───────────────────────────────────────── +// Packs all chunk quads contiguously into a single buffer. +// Simple strategy: full rebuild whenever any chunk is dirty. + +void VoxelRenderer::rebuildMegaBuffer(VoxelWorld& world) { + cpuMegaQuads_.clear(); + chunkSlots_.clear(); + cpuChunkInfo_.clear(); + + uint32_t offset = 0; + float debugFlag = debugFaceColors_ ? 1.0f : 0.0f; + + world.forEachChunk([&](const ChunkPos& pos, Chunk& chunk) { + if (chunk.quadCount == 0) return; + if (offset + chunk.quadCount > MEGA_BUFFER_CAPACITY) return; // overflow guard + + ChunkSlot slot; + slot.pos = pos; + slot.quadOffset = offset; + slot.quadCount = chunk.quadCount; + chunkSlots_.push_back(slot); + + GPUChunkInfo info = {}; + info.worldPos = XMFLOAT4( + (float)(pos.x * CHUNK_SIZE), + (float)(pos.y * CHUNK_SIZE), + (float)(pos.z * CHUNK_SIZE), + debugFlag + ); + info.quadOffset = offset; + info.quadCount = chunk.quadCount; + for (int f = 0; f < 6; f++) { + info.faceOffsets[f] = chunk.faceOffsets[f]; + info.faceCounts[f] = chunk.faceCounts[f]; + } + cpuChunkInfo_.push_back(info); + + cpuMegaQuads_.insert(cpuMegaQuads_.end(), chunk.quads.begin(), chunk.quads.end()); + offset += chunk.quadCount; + }); + + chunkCount_ = (uint32_t)chunkSlots_.size(); + totalQuads_ = offset; +} + +void VoxelRenderer::updateMeshes(VoxelWorld& world) { + if (!device_) return; + + // Re-mesh dirty chunks + bool anyDirty = false; + world.forEachChunk([&](const ChunkPos& pos, Chunk& chunk) { + if (chunk.dirty) { + VoxelMesher::meshChunk(chunk, world); + anyDirty = true; + } + }); + + if (anyDirty || megaBufferDirty_) { + rebuildMegaBuffer(world); + megaBufferDirty_ = false; + } +} + +// ── Render pass ───────────────────────────────────────────────── + +void VoxelRenderer::render( + CommandList cmd, + const wi::scene::CameraComponent& camera, + const Texture& depthBuffer, + const Texture& renderTarget +) const { + if (!initialized_ || chunkCount_ == 0 || !pso_.IsValid()) return; + + auto* dev = device_; + + // Upload mega-buffer and chunk info to GPU + if (!cpuMegaQuads_.empty()) { + dev->UpdateBuffer(&megaQuadBuffer_, cpuMegaQuads_.data(), cmd, + cpuMegaQuads_.size() * sizeof(PackedQuad)); + } + if (!cpuChunkInfo_.empty()) { + dev->UpdateBuffer(&chunkInfoBuffer_, cpuChunkInfo_.data(), cmd, + cpuChunkInfo_.size() * sizeof(GPUChunkInfo)); + } + + // Per-frame constants + VoxelConstants cb = {}; + XMStoreFloat4x4(&cb.viewProjection, camera.GetViewProjection()); + cb.cameraPosition = XMFLOAT4(camera.Eye.x, camera.Eye.y, camera.Eye.z, 1.0f); + cb.sunDirection = XMFLOAT4(-0.5f, -0.8f, -0.3f, 0.0f); + cb.sunColor = XMFLOAT4(1.2f, 1.1f, 0.9f, 1.0f); + cb.chunkSize = (float)CHUNK_SIZE; + cb.textureTiling = 0.25f; + cb.chunkCount = chunkCount_; + dev->UpdateBuffer(&constantBuffer_, &cb, cmd, sizeof(cb)); + + // CPU frustum culling + wi::primitive::Frustum frustum; + frustum.Create(camera.GetViewProjection()); + + // ── Render pass: color + depth ──────────────────────────────── + RenderPassImage rp[] = { + RenderPassImage::RenderTarget( + &renderTarget, + RenderPassImage::LoadOp::CLEAR, + RenderPassImage::StoreOp::STORE, + ResourceState::SHADER_RESOURCE, + ResourceState::SHADER_RESOURCE + ), + RenderPassImage::DepthStencil( + &depthBuffer, + RenderPassImage::LoadOp::CLEAR, + RenderPassImage::StoreOp::STORE, + ResourceState::DEPTHSTENCIL, + ResourceState::DEPTHSTENCIL, + ResourceState::DEPTHSTENCIL + ), + }; + dev->RenderPassBegin(rp, 2, cmd); + + Viewport vp; + vp.width = (float)renderTarget.GetDesc().width; + vp.height = (float)renderTarget.GetDesc().height; + vp.min_depth = 0.0f; + vp.max_depth = 1.0f; + dev->BindViewports(1, &vp, cmd); + + Rect scissor = { 0, 0, (int)vp.width, (int)vp.height }; + dev->BindScissorRects(1, &scissor, cmd); + + dev->BindPipelineState(&pso_, cmd); + dev->BindConstantBuffer(&constantBuffer_, 0, cmd); + dev->BindResource(&megaQuadBuffer_, 0, cmd); // t0: mega quad buffer + dev->BindResource(&textureArray_, 1, cmd); // t1: material textures + dev->BindResource(&chunkInfoBuffer_, 2, cmd); // t2: chunk info + dev->BindSampler(&sampler_, 0, cmd); + + visibleChunks_ = 0; + drawCalls_ = 0; + + // Push constant structure (must be 48 bytes = 12 x uint32, matches b999) + struct VoxelPush { + uint32_t chunkIndex; + uint32_t quadOffset; // offset into mega quad buffer (in quads) + uint32_t pad[10]; + }; + + // Simple DrawInstanced loop with frustum culling + push constants + for (uint32_t i = 0; i < chunkCount_; i++) { + const auto& slot = chunkSlots_[i]; + if (slot.quadCount == 0) continue; + + XMFLOAT3 aabbMin( + (float)(slot.pos.x * CHUNK_SIZE), + (float)(slot.pos.y * CHUNK_SIZE), + (float)(slot.pos.z * CHUNK_SIZE) + ); + XMFLOAT3 aabbMax( + aabbMin.x + CHUNK_SIZE, + aabbMin.y + CHUNK_SIZE, + aabbMin.z + CHUNK_SIZE + ); + wi::primitive::AABB aabb(aabbMin, aabbMax); + if (!frustum.CheckBoxFast(aabb)) continue; + + visibleChunks_++; + + // Pass chunk index AND quad offset via push constants + // (SV_VertexID/SV_InstanceID offsets unreliable across drivers) + VoxelPush pushData = {}; + pushData.chunkIndex = i; + pushData.quadOffset = slot.quadOffset; + dev->PushConstants(&pushData, sizeof(pushData), cmd); + + // startVertexLocation = 0: the VS computes quad address from push.quadOffset + dev->DrawInstanced(slot.quadCount * 6, 1, 0, 0, cmd); + drawCalls_++; + } + + dev->RenderPassEnd(cmd); +} + +// ── VoxelRenderPath (custom RenderPath3D) ─────────────────────── + +void VoxelRenderPath::Start() { + RenderPath3D::Start(); + + auto* device = wi::graphics::GetDevice(); + renderer.initialize(device); + renderer.debugFaceColors_ = debugMode; + + // Generate world + if (debugMode) { + world.generateDebug(); + cameraPos = { 10.0f, 10.0f, 0.0f }; + cameraPitch = -0.4f; + cameraYaw = 0.5f; + } else { + world.generateAround(cameraPos.x, cameraPos.y, cameraPos.z, 4); + } + if (renderer.isInitialized()) { + renderer.updateMeshes(world); + } + worldGenerated_ = true; + + setAO(AO_DISABLED); + setFXAAEnabled(true); + setBloomEnabled(false); + + createRenderTargets(); +} + +void VoxelRenderPath::createRenderTargets() { + auto* device = wi::graphics::GetDevice(); + if (!device) return; + + uint32_t w = GetPhysicalWidth(); + uint32_t h = GetPhysicalHeight(); + if (w == 0 || h == 0) { w = 1920; h = 1080; } + + wi::graphics::TextureDesc rtDesc; + rtDesc.type = wi::graphics::TextureDesc::Type::TEXTURE_2D; + rtDesc.width = w; + rtDesc.height = h; + rtDesc.format = wi::graphics::Format::R8G8B8A8_UNORM; + rtDesc.bind_flags = wi::graphics::BindFlag::RENDER_TARGET | wi::graphics::BindFlag::SHADER_RESOURCE; + rtDesc.mip_levels = 1; + rtDesc.sample_count = 1; + rtDesc.layout = wi::graphics::ResourceState::SHADER_RESOURCE; + device->CreateTexture(&rtDesc, nullptr, &voxelRT_); + + wi::graphics::TextureDesc depthDesc; + depthDesc.type = wi::graphics::TextureDesc::Type::TEXTURE_2D; + depthDesc.width = w; + depthDesc.height = h; + depthDesc.format = wi::graphics::Format::D32_FLOAT; + depthDesc.bind_flags = wi::graphics::BindFlag::DEPTH_STENCIL | wi::graphics::BindFlag::SHADER_RESOURCE; + depthDesc.mip_levels = 1; + depthDesc.sample_count = 1; + depthDesc.layout = wi::graphics::ResourceState::DEPTHSTENCIL; + device->CreateTexture(&depthDesc, nullptr, &voxelDepth_); + + rtCreated_ = voxelRT_.IsValid() && voxelDepth_.IsValid(); + wi::backlog::post("VoxelRenderPath: render targets " + std::string(rtCreated_ ? "OK" : "FAILED") + + " (" + std::to_string(w) + "x" + std::to_string(h) + ")"); +} + +// ── WASD camera input ─────────────────────────────────────────── + +static constexpr wi::input::BUTTON KEY_W = (wi::input::BUTTON)(wi::input::CHARACTER_RANGE_START + ('W' - 'A')); +static constexpr wi::input::BUTTON KEY_A = (wi::input::BUTTON)(wi::input::CHARACTER_RANGE_START + ('A' - 'A')); +static constexpr wi::input::BUTTON KEY_S = (wi::input::BUTTON)(wi::input::CHARACTER_RANGE_START + ('S' - 'A')); +static constexpr wi::input::BUTTON KEY_D = (wi::input::BUTTON)(wi::input::CHARACTER_RANGE_START + ('D' - 'A')); + +void VoxelRenderPath::handleInput(float dt) { + if (wi::input::Press(wi::input::MOUSE_BUTTON_RIGHT)) { + mouseCaptured = !mouseCaptured; + wi::input::HidePointer(mouseCaptured); + } + + if (mouseCaptured) { + auto mouseState = wi::input::GetMouseState(); + cameraYaw += mouseState.delta_position.x * cameraSensitivity; + cameraPitch += mouseState.delta_position.y * cameraSensitivity; + cameraPitch = std::clamp(cameraPitch, -1.5f, 1.5f); + } + + float cosPitch = std::cos(cameraPitch); + XMFLOAT3 forward( + std::sin(cameraYaw) * cosPitch, + -std::sin(cameraPitch), + std::cos(cameraYaw) * cosPitch + ); + XMFLOAT3 right(std::cos(cameraYaw), 0.0f, -std::sin(cameraYaw)); + + float speed = cameraSpeed * dt; + if (wi::input::Down(wi::input::KEYBOARD_BUTTON_LSHIFT)) speed *= 3.0f; + + if (wi::input::Down(KEY_W)) { cameraPos.x += forward.x * speed; cameraPos.y += forward.y * speed; cameraPos.z += forward.z * speed; } + if (wi::input::Down(KEY_S)) { cameraPos.x -= forward.x * speed; cameraPos.y -= forward.y * speed; cameraPos.z -= forward.z * speed; } + if (wi::input::Down(KEY_A)) { cameraPos.x -= right.x * speed; cameraPos.z -= right.z * speed; } + if (wi::input::Down(KEY_D)) { cameraPos.x += right.x * speed; cameraPos.z += right.z * speed; } + if (wi::input::Down(wi::input::KEYBOARD_BUTTON_SPACE)) cameraPos.y += speed; + if (wi::input::Down(wi::input::KEYBOARD_BUTTON_LCONTROL)) cameraPos.y -= speed; + + camera->Eye = cameraPos; + camera->At = forward; + camera->Up = XMFLOAT3(0, 1, 0); + camera->UpdateCamera(); +} + +void VoxelRenderPath::Update(float dt) { + lastDt_ = dt; + float instantFps = (dt > 0.0f) ? (1.0f / dt) : 0.0f; + smoothFps_ = smoothFps_ * 0.95f + instantFps * 0.05f; + if (camera) handleInput(dt); + if (renderer.isInitialized()) renderer.updateMeshes(world); + RenderPath3D::Update(dt); +} + +void VoxelRenderPath::Render() const { + RenderPath3D::Render(); + + if (renderer.isInitialized() && camera && rtCreated_) { + auto* device = wi::graphics::GetDevice(); + CommandList cmd = device->BeginCommandList(); + renderer.render(cmd, *camera, voxelDepth_, voxelRT_); + } +} + +void VoxelRenderPath::Compose(CommandList cmd) const { + frameCount_++; + + RenderPath3D::Compose(cmd); + + if (rtCreated_ && voxelRT_.IsValid()) { + wi::image::Params fx; + fx.enableFullScreen(); + fx.blendFlag = wi::enums::BLENDMODE_OPAQUE; + wi::image::Draw(&voxelRT_, fx, cmd); + } + + // HUD overlay + wi::font::Params fp; + fp.posX = 10; fp.posY = 10; fp.size = 20; + fp.color = wi::Color(255, 255, 255, 230); + fp.shadowColor = wi::Color(0, 0, 0, 180); + + char fpsStr[16]; + snprintf(fpsStr, sizeof(fpsStr), "%.1f", smoothFps_); + char dtStr[16]; + snprintf(dtStr, sizeof(dtStr), "%.2f", lastDt_ * 1000.0f); + + std::string stats = "BVLE Voxel Engine (Phase 2 — GPU-driven)\n"; + stats += "FPS: " + std::string(fpsStr) + " (" + std::string(dtStr) + " ms)\n"; + if (debugMode) { + stats += "=== DEBUG FACE MODE ===\n"; + stats += "+X=Red -X=DkRed +Y=Green -Y=DkGreen +Z=Blue -Z=DkBlue\n"; + } + stats += "Chunks: " + std::to_string(renderer.getVisibleChunks()) + + "/" + std::to_string(renderer.getChunkCount()) + "\n"; + stats += "Quads: " + std::to_string(renderer.getTotalQuads()) + "\n"; + stats += "Draw Calls: " + std::to_string(renderer.getDrawCalls()) + + " (DrawInstanced + CPU cull + backface)\n"; + + char cullStr[16], drawStr[16]; + snprintf(cullStr, sizeof(cullStr), "%.3f", renderer.getGpuCullTimeMs()); + snprintf(drawStr, sizeof(drawStr), "%.3f", renderer.getGpuDrawTimeMs()); + stats += "GPU Cull: " + std::string(cullStr) + " ms | Draw: " + std::string(drawStr) + " ms\n"; + stats += "WASD+Space/Ctrl: move | Shift: fast | Right-click: capture mouse"; + + wi::font::Draw(stats, fp, cmd); +} + +} // namespace voxel diff --git a/src/voxel/VoxelRenderer.h b/src/voxel/VoxelRenderer.h new file mode 100644 index 0000000..f0aee14 --- /dev/null +++ b/src/voxel/VoxelRenderer.h @@ -0,0 +1,180 @@ +#pragma once +#include "VoxelWorld.h" +#include "VoxelMesher.h" +#include "WickedEngine.h" + +namespace voxel { + +// ── GPU-visible chunk info (must match HLSL GPUChunkInfo) ──────── +struct GPUChunkInfo { + XMFLOAT4 worldPos; // xyz = chunk origin, w = debug flag + uint32_t quadOffset; // offset into mega quad buffer + uint32_t quadCount; // number of quads for this chunk + uint32_t pad[2]; // align to 32 bytes + uint32_t faceOffsets[6]; // per-face quad offset within this chunk's quads + uint32_t faceCounts[6]; // per-face quad count +}; + +// ── Voxel Renderer (Phase 2: mega-buffer + MDI pipeline) ──────── +class VoxelRenderer { +public: + VoxelRenderer(); + ~VoxelRenderer(); + + void initialize(wi::graphics::GraphicsDevice* device); + void shutdown(); + + // Mesh dirty chunks and repack the mega-buffer + void updateMeshes(VoxelWorld& world); + + // Render all visible chunks + void render( + wi::graphics::CommandList cmd, + const wi::scene::CameraComponent& camera, + const wi::graphics::Texture& depthBuffer, + const wi::graphics::Texture& renderTarget + ) const; + + // Generate procedural textures for materials + void generateTextures(); + + // Stats + uint32_t getTotalQuads() const { return totalQuads_; } + uint32_t getVisibleChunks() const { return visibleChunks_; } + uint32_t getDrawCalls() const { return drawCalls_; } + uint32_t getChunkCount() const { return chunkCount_; } + bool isInitialized() const { return initialized_; } + bool isGpuCulling() const { return gpuCullingEnabled_; } + + bool debugFaceColors_ = false; + +private: + void createPipeline(); + void rebuildMegaBuffer(VoxelWorld& world); + + wi::graphics::GraphicsDevice* device_ = nullptr; + + // Shaders & Pipeline + wi::graphics::Shader vertexShader_; + wi::graphics::Shader pixelShader_; + wi::graphics::PipelineState pso_; + wi::graphics::Shader cullShader_; // Frustum cull compute shader + + // Texture array for materials (256x256, 5 layers for prototype) + wi::graphics::Texture textureArray_; + wi::graphics::Sampler sampler_; + + // ── Mega-buffer architecture (Phase 2) ────────────────────── + static constexpr uint32_t MEGA_BUFFER_CAPACITY = 2 * 1024 * 1024; // 2M quads max (16 MB) + static constexpr uint32_t MAX_CHUNKS = 2048; + static constexpr uint32_t MAX_DRAWS = MAX_CHUNKS * 6; // up to 6 face groups per chunk + + wi::graphics::GPUBuffer megaQuadBuffer_; // StructuredBuffer, SRV t0 + wi::graphics::GPUBuffer chunkInfoBuffer_; // StructuredBuffer, SRV t2 + + // CPU-side tracking + struct ChunkSlot { + ChunkPos pos; + uint32_t quadOffset; // offset into mega-buffer (in quads) + uint32_t quadCount; + }; + std::vector chunkSlots_; + std::vector cpuChunkInfo_; + std::vector cpuMegaQuads_; // CPU staging for mega-buffer + uint32_t chunkCount_ = 0; + bool megaBufferDirty_ = true; + + // ── Indirect draw (Phase 2 MDI) ───────────────────────────── + // IndirectDrawArgsInstanced: { vertexCount, instanceCount, startVertex, startInstance } + struct IndirectDrawArgs { + uint32_t vertexCountPerInstance; + uint32_t instanceCount; + uint32_t startVertexLocation; + uint32_t startInstanceLocation; + }; + wi::graphics::GPUBuffer indirectArgsBuffer_; // IndirectDrawArgs[MAX_DRAWS] + wi::graphics::GPUBuffer drawCountBuffer_; // uint32_t[1] + mutable std::vector cpuIndirectArgs_; + bool gpuCullingEnabled_ = false; // GPU compute cull vs CPU fallback + + // Constants buffer (must match HLSL VoxelCB) + struct VoxelConstants { + XMFLOAT4X4 viewProjection; + XMFLOAT4 cameraPosition; + XMFLOAT4 sunDirection; + XMFLOAT4 sunColor; + float chunkSize; + float textureTiling; + float _pad[2]; + XMFLOAT4 frustumPlanes[6]; // ax+by+cz+d=0 + uint32_t chunkCount; + uint32_t _cullPad0; + uint32_t _cullPad1; + uint32_t _cullPad2; + }; + wi::graphics::GPUBuffer constantBuffer_; + + // ── GPU Compute Mesher (Phase 2 benchmark) ───────────────────── + wi::graphics::Shader meshShader_; // voxelMeshCS compute shader + wi::graphics::GPUBuffer voxelDataBuffer_; // chunk voxel data (StructuredBuffer) + wi::graphics::GPUBuffer gpuQuadBuffer_; // GPU mesh output (RWStructuredBuffer) + wi::graphics::GPUBuffer gpuQuadCounter_; // atomic counter for GPU mesh output + bool gpuMesherAvailable_ = false; + + // ── GPU Timestamp Queries (Phase 2 benchmark) ──────────────── + wi::graphics::GPUQueryHeap timestampHeap_; + wi::graphics::GPUBuffer timestampReadback_; + static constexpr uint32_t TS_CULL_BEGIN = 0; + static constexpr uint32_t TS_CULL_END = 1; + static constexpr uint32_t TS_DRAW_BEGIN = 2; + static constexpr uint32_t TS_DRAW_END = 3; + static constexpr uint32_t TS_COUNT = 4; + mutable float gpuCullTimeMs_ = 0.0f; + mutable float gpuDrawTimeMs_ = 0.0f; + + // Stats (mutable: updated during const Render() call) + mutable uint32_t totalQuads_ = 0; + mutable uint32_t visibleChunks_ = 0; + mutable uint32_t drawCalls_ = 0; + + bool initialized_ = false; + +public: + float getGpuCullTimeMs() const { return gpuCullTimeMs_; } + float getGpuDrawTimeMs() const { return gpuDrawTimeMs_; } +}; + +// ── Custom RenderPath that integrates voxel rendering ─────────── +class VoxelRenderPath : public wi::RenderPath3D { +public: + VoxelWorld world; + VoxelRenderer renderer; + + bool debugMode = false; + + float cameraSpeed = 50.0f; + float cameraSensitivity = 0.003f; + XMFLOAT3 cameraPos = { 256.0f, 100.0f, 256.0f }; + float cameraPitch = -0.3f; + float cameraYaw = 0.0f; + bool mouseCaptured = false; + + void Start() override; + void Update(float dt) override; + void Render() const override; + void Compose(wi::graphics::CommandList cmd) const override; + +private: + void handleInput(float dt); + void createRenderTargets(); + mutable bool worldGenerated_ = false; + mutable int frameCount_ = 0; + mutable float lastDt_ = 0.016f; + mutable float smoothFps_ = 60.0f; + + wi::graphics::Texture voxelRT_; + wi::graphics::Texture voxelDepth_; + mutable bool rtCreated_ = false; +}; + +} // namespace voxel diff --git a/src/voxel/VoxelTypes.h b/src/voxel/VoxelTypes.h new file mode 100644 index 0000000..8214663 --- /dev/null +++ b/src/voxel/VoxelTypes.h @@ -0,0 +1,118 @@ +#pragma once +#include +#include + +namespace voxel { + +// ── Voxel Data (16 bits per voxel, as per spec) ───────────────── +// Layout: 8 bits material ID | 4 bits flags | 4 bits metadata +struct VoxelData { + uint16_t packed = 0; + + VoxelData() = default; + explicit VoxelData(uint8_t materialID, uint8_t flags = 0, uint8_t meta = 0) { + packed = (uint16_t(materialID) << 8) | (uint16_t(flags & 0xF) << 4) | (meta & 0xF); + } + + uint8_t getMaterialID() const { return uint8_t(packed >> 8); } + uint8_t getFlags() const { return uint8_t((packed >> 4) & 0xF); } + uint8_t getMetadata() const { return uint8_t(packed & 0xF); } + + bool isEmpty() const { return packed == 0; } + bool isSmooth() const { return (getFlags() & FLAG_SMOOTH) != 0; } + bool isTransparent() const { return (getFlags() & FLAG_TRANSPARENT) != 0; } + bool isEmissive() const { return (getFlags() & FLAG_EMISSIVE) != 0; } + + static constexpr uint8_t FLAG_SMOOTH = 0x1; + static constexpr uint8_t FLAG_TRANSPARENT = 0x2; + static constexpr uint8_t FLAG_EMISSIVE = 0x4; + static constexpr uint8_t FLAG_CUSTOM = 0x8; +}; + +// ── Chunk Constants ───────────────────────────────────────────── +static constexpr int CHUNK_SIZE = 32; +static constexpr int CHUNK_VOLUME = CHUNK_SIZE * CHUNK_SIZE * CHUNK_SIZE; + +// ── Packed Vertex for Greedy Mesh Quads (8 bytes per quad) ────── +// Layout per spec: +// 6 bits posX | 6 bits posY | 6 bits posZ | +// 6 bits width | 6 bits height | 3 bits face | +// 8 bits materialID | 8 bits AO | 15 bits flags +struct PackedQuad { + uint64_t data; + + static PackedQuad create( + uint8_t x, uint8_t y, uint8_t z, + uint8_t w, uint8_t h, uint8_t face, + uint8_t materialID, uint8_t ao = 0, uint16_t flags = 0 + ) { + PackedQuad q; + q.data = + (uint64_t(x & 0x3F)) | + (uint64_t(y & 0x3F) << 6) | + (uint64_t(z & 0x3F) << 12) | + (uint64_t(w & 0x3F) << 18) | + (uint64_t(h & 0x3F) << 24) | + (uint64_t(face & 0x7) << 30) | + (uint64_t(materialID) << 33) | + (uint64_t(ao) << 41) | + (uint64_t(flags & 0x7FFF) << 49); + return q; + } + + uint8_t getX() const { return uint8_t(data & 0x3F); } + uint8_t getY() const { return uint8_t((data >> 6) & 0x3F); } + uint8_t getZ() const { return uint8_t((data >> 12) & 0x3F); } + uint8_t getWidth() const { return uint8_t((data >> 18) & 0x3F); } + uint8_t getHeight() const { return uint8_t((data >> 24) & 0x3F); } + uint8_t getFace() const { return uint8_t((data >> 30) & 0x7); } + uint8_t getMaterialID() const { return uint8_t((data >> 33) & 0xFF); } + uint8_t getAO() const { return uint8_t((data >> 41) & 0xFF); } + uint16_t getFlags() const { return uint16_t((data >> 49) & 0x7FFF); } +}; + +// Face directions: +X, -X, +Y, -Y, +Z, -Z +enum Face : uint8_t { + FACE_POS_X = 0, + FACE_NEG_X = 1, + FACE_POS_Y = 2, + FACE_NEG_Y = 3, + FACE_POS_Z = 4, + FACE_NEG_Z = 5, + FACE_COUNT = 6 +}; + +// ── Material Descriptor ───────────────────────────────────────── +struct MaterialDesc { + uint16_t albedoTextureIndex = 0; + uint16_t normalTextureIndex = 0; + uint16_t heightmapTextureIndex = 0; + uint8_t roughness = 128; // 0-255 mapped to 0.0-1.0 + uint8_t metallic = 0; + uint8_t flags = 0; // triplanar, blend mode, etc. + uint8_t _pad = 0; + + static constexpr uint8_t FLAG_TRIPLANAR = 0x1; +}; + +// ── Chunk Position Hash ───────────────────────────────────────── +struct ChunkPos { + int32_t x, y, z; + + bool operator==(const ChunkPos& other) const { + return x == other.x && y == other.y && z == other.z; + } +}; + +struct ChunkPosHash { + size_t operator()(const ChunkPos& p) const { + // FNV-1a inspired hash + size_t h = 0x811c9dc5; + h ^= size_t(p.x); h *= 0x01000193; + h ^= size_t(p.y); h *= 0x01000193; + h ^= size_t(p.z); h *= 0x01000193; + return h; + } +}; + +} // namespace voxel diff --git a/src/voxel/VoxelWorld.cpp b/src/voxel/VoxelWorld.cpp new file mode 100644 index 0000000..a290512 --- /dev/null +++ b/src/voxel/VoxelWorld.cpp @@ -0,0 +1,282 @@ +#include "VoxelWorld.h" +#include +#include + +namespace voxel { + +VoxelWorld::VoxelWorld() { + setupDefaultMaterials(); +} + +VoxelWorld::~VoxelWorld() = default; + +void VoxelWorld::setupDefaultMaterials() { + // Material 0: Air (empty, never rendered) + // Material 1: Grass + materials[1].albedoTextureIndex = 0; + materials[1].roughness = 200; + materials[1].flags = MaterialDesc::FLAG_TRIPLANAR; + // Material 2: Dirt + materials[2].albedoTextureIndex = 1; + materials[2].roughness = 220; + materials[2].flags = MaterialDesc::FLAG_TRIPLANAR; + // Material 3: Stone + materials[3].albedoTextureIndex = 2; + materials[3].roughness = 180; + materials[3].flags = MaterialDesc::FLAG_TRIPLANAR; + // Material 4: Sand + materials[4].albedoTextureIndex = 3; + materials[4].roughness = 230; + materials[4].flags = MaterialDesc::FLAG_TRIPLANAR; + // Material 5: Snow + materials[5].albedoTextureIndex = 4; + materials[5].roughness = 150; + materials[5].flags = MaterialDesc::FLAG_TRIPLANAR; +} + +// ── Permutation-based noise (no external dependency) ──────────── + +static constexpr int PERM_SIZE = 256; +static uint8_t perm[512]; +static uint32_t permSeed = 0; +static bool permInitialized = false; + +static void initPerm(uint32_t seed) { + if (permInitialized && permSeed == seed) return; + for (int i = 0; i < PERM_SIZE; i++) perm[i] = (uint8_t)i; + // Fisher-Yates shuffle with seed + uint32_t s = seed; + for (int i = PERM_SIZE - 1; i > 0; i--) { + s = s * 1664525u + 1013904223u; // LCG + int j = s % (i + 1); + uint8_t tmp = perm[i]; + perm[i] = perm[j]; + perm[j] = tmp; + } + for (int i = 0; i < 256; i++) perm[i + 256] = perm[i]; + permSeed = seed; + permInitialized = true; +} + +static float fade(float t) { return t * t * t * (t * (t * 6.0f - 15.0f) + 10.0f); } +static float lerp(float a, float b, float t) { return a + t * (b - a); } + +static float grad(int hash, float x, float y, float z) { + int h = hash & 15; + float u = h < 8 ? x : y; + float v = h < 4 ? y : (h == 12 || h == 14 ? x : z); + return ((h & 1) ? -u : u) + ((h & 2) ? -v : v); +} + +float VoxelWorld::noise3D(float x, float y, float z) const { + initPerm(seed_); + int X = (int)std::floor(x) & 255; + int Y = (int)std::floor(y) & 255; + int Z = (int)std::floor(z) & 255; + x -= std::floor(x); + y -= std::floor(y); + z -= std::floor(z); + float u = fade(x), v = fade(y), w = fade(z); + + int A = perm[X] + Y; + int AA = perm[A] + Z; + int AB = perm[A + 1] + Z; + int B = perm[X + 1] + Y; + int BA = perm[B] + Z; + int BB = perm[B + 1] + Z; + + return lerp( + lerp(lerp(grad(perm[AA], x, y, z), grad(perm[BA], x-1, y, z), u), + lerp(grad(perm[AB], x, y-1, z), grad(perm[BB], x-1, y-1, z), u), v), + lerp(lerp(grad(perm[AA+1], x, y, z-1), grad(perm[BA+1], x-1, y, z-1), u), + lerp(grad(perm[AB+1], x, y-1, z-1), grad(perm[BB+1], x-1, y-1, z-1), u), v), + w); +} + +float VoxelWorld::fbm(float x, float y, float z, int octaves) const { + float value = 0.0f; + float amplitude = 1.0f; + float frequency = 1.0f; + float maxVal = 0.0f; + for (int i = 0; i < octaves; i++) { + value += amplitude * noise3D(x * frequency, y * frequency, z * frequency); + maxVal += amplitude; + amplitude *= 0.5f; + frequency *= 2.0f; + } + return value / maxVal; +} + +void VoxelWorld::generateChunk(Chunk& chunk) { + const float scale = 0.02f; // terrain horizontal scale + const float heightScale = 64.0f; + const float baseHeight = 40.0f; + const float caveScale = 0.05f; + const float caveThreshold = 0.3f; + + for (int z = 0; z < CHUNK_SIZE; z++) { + for (int x = 0; x < CHUNK_SIZE; x++) { + // World-space coordinates + float wx = (float)(chunk.pos.x * CHUNK_SIZE + x); + float wz = (float)(chunk.pos.z * CHUNK_SIZE + z); + + // Heightmap using fBm + float height = baseHeight + heightScale * fbm(wx * scale, 0.0f, wz * scale, 5); + + for (int y = 0; y < CHUNK_SIZE; y++) { + float wy = (float)(chunk.pos.y * CHUNK_SIZE + y); + VoxelData v; + + if (wy > height) { + // Air above terrain + v = VoxelData(); + } else { + // Cave generation + float cave = fbm(wx * caveScale, wy * caveScale, wz * caveScale, 3); + if (std::abs(cave) < caveThreshold && wy > 10.0f && wy < height - 3.0f) { + v = VoxelData(); // Cave + } else if (wy > height - 1.0f) { + // Surface layer: material depends on height + if (wy > 90.0f) { + v = VoxelData(5); // Snow + } else if (wy > 70.0f) { + v = VoxelData(3); // Stone + } else if (wy < 25.0f) { + v = VoxelData(4); // Sand + } else { + v = VoxelData(1); // Grass + } + } else if (wy > height - 4.0f) { + v = VoxelData(2); // Dirt + } else { + v = VoxelData(3); // Stone + } + } + + chunk.at(x, y, z) = v; + } + } + } + + chunk.dirty = true; +} + +void VoxelWorld::generateAround(float cx, float cy, float cz, int radiusChunks) { + int ccx = (int)std::floor(cx / CHUNK_SIZE); + int ccy = (int)std::floor(cy / CHUNK_SIZE); + int ccz = (int)std::floor(cz / CHUNK_SIZE); + + for (int dz = -radiusChunks; dz <= radiusChunks; dz++) { + for (int dx = -radiusChunks; dx <= radiusChunks; dx++) { + // Y range: only generate chunks that could contain terrain (0 to ~4 chunks high) + for (int dy = 0; dy < 8; dy++) { + ChunkPos pos = { ccx + dx, dy, ccz + dz }; + if (chunks_.find(pos) == chunks_.end()) { + auto chunk = std::make_unique(); + chunk->pos = pos; + generateChunk(*chunk); + chunks_[pos] = std::move(chunk); + } + } + } + } +} + +Chunk* VoxelWorld::getChunk(const ChunkPos& pos) { + auto it = chunks_.find(pos); + return it != chunks_.end() ? it->second.get() : nullptr; +} + +const Chunk* VoxelWorld::getChunk(const ChunkPos& pos) const { + auto it = chunks_.find(pos); + return it != chunks_.end() ? it->second.get() : nullptr; +} + +VoxelData VoxelWorld::getVoxel(int wx, int wy, int wz) const { + // Integer floor division that works for negatives + auto floorDiv = [](int a, int b) -> int { + return (a >= 0) ? (a / b) : ((a - b + 1) / b); + }; + auto floorMod = [](int a, int b) -> int { + int r = a % b; + return (r < 0) ? r + b : r; + }; + + ChunkPos cp = { + floorDiv(wx, CHUNK_SIZE), + floorDiv(wy, CHUNK_SIZE), + floorDiv(wz, CHUNK_SIZE) + }; + const Chunk* chunk = getChunk(cp); + if (!chunk) return VoxelData(); + + return chunk->at( + floorMod(wx, CHUNK_SIZE), + floorMod(wy, CHUNK_SIZE), + floorMod(wz, CHUNK_SIZE) + ); +} + +void VoxelWorld::setVoxel(int wx, int wy, int wz, VoxelData v) { + auto floorDiv = [](int a, int b) -> int { + return (a >= 0) ? (a / b) : ((a - b + 1) / b); + }; + auto floorMod = [](int a, int b) -> int { + int r = a % b; + return (r < 0) ? r + b : r; + }; + + ChunkPos cp = { + floorDiv(wx, CHUNK_SIZE), + floorDiv(wy, CHUNK_SIZE), + floorDiv(wz, CHUNK_SIZE) + }; + Chunk* chunk = getChunk(cp); + if (!chunk) return; + + chunk->at( + floorMod(wx, CHUNK_SIZE), + floorMod(wy, CHUNK_SIZE), + floorMod(wz, CHUNK_SIZE) + ) = v; + chunk->dirty = true; +} + +void VoxelWorld::generateDebug() { + chunks_.clear(); + + // Create a single chunk at origin + ChunkPos cp = {0, 0, 0}; + auto chunk = std::make_unique(); + chunk->pos = cp; + std::memset(chunk->voxels, 0, sizeof(chunk->voxels)); + + VoxelData stone(3); // material 3 = stone + + // Block 1: single isolated block at (5, 5, 5) + // → should show all 6 faces + chunk->at(5, 5, 5) = stone; + + // Block 2: 2x1x1 bar at (12, 5, 5) along X + // → internal faces should be culled + chunk->at(12, 5, 5) = stone; + chunk->at(13, 5, 5) = stone; + + // Block 3: L-shape at (5, 5, 12) + chunk->at(5, 5, 12) = stone; + chunk->at(6, 5, 12) = stone; + chunk->at(5, 5, 13) = stone; + + // Block 4: 3-high column at (12, 5, 12) + chunk->at(12, 5, 12) = stone; + chunk->at(12, 6, 12) = stone; + chunk->at(12, 7, 12) = stone; + + // Block 5: single block at (20, 5, 5) with material 1 (grass) + chunk->at(20, 5, 5) = VoxelData(1); + + chunk->dirty = true; + chunks_[cp] = std::move(chunk); +} + +} // namespace voxel diff --git a/src/voxel/VoxelWorld.h b/src/voxel/VoxelWorld.h new file mode 100644 index 0000000..2392298 --- /dev/null +++ b/src/voxel/VoxelWorld.h @@ -0,0 +1,87 @@ +#pragma once +#include "VoxelTypes.h" +#include +#include +#include +#include + +namespace voxel { + +// ── Chunk ─────────────────────────────────────────────────────── +struct Chunk { + VoxelData voxels[CHUNK_VOLUME]; + ChunkPos pos; + bool dirty = true; + + // Mesh data (output of greedy mesher, sorted by face ID 0-5) + std::vector quads; + uint32_t quadCount = 0; + uint32_t faceOffsets[6] = {}; // offset (in quads) for each face group within quads[] + uint32_t faceCounts[6] = {}; // number of quads per face group + + VoxelData& at(int x, int y, int z) { + return voxels[x + y * CHUNK_SIZE + z * CHUNK_SIZE * CHUNK_SIZE]; + } + + const VoxelData& at(int x, int y, int z) const { + return voxels[x + y * CHUNK_SIZE + z * CHUNK_SIZE * CHUNK_SIZE]; + } + + bool isInBounds(int x, int y, int z) const { + return x >= 0 && x < CHUNK_SIZE && + y >= 0 && y < CHUNK_SIZE && + z >= 0 && z < CHUNK_SIZE; + } +}; + +// ── World ─────────────────────────────────────────────────────── +class VoxelWorld { +public: + VoxelWorld(); + ~VoxelWorld(); + + // Generate a procedural world around a center position + void generateAround(float cx, float cy, float cz, int radiusChunks); + + // Generate debug world: isolated blocks for face visibility testing + void generateDebug(); + + // Get a chunk (nullptr if not loaded) + Chunk* getChunk(const ChunkPos& pos); + const Chunk* getChunk(const ChunkPos& pos) const; + + // Get voxel at world position (handles cross-chunk lookup) + VoxelData getVoxel(int wx, int wy, int wz) const; + void setVoxel(int wx, int wy, int wz, VoxelData v); + + // Iterate all chunks + template + void forEachChunk(Fn&& fn) { + for (auto& [pos, chunk] : chunks_) { + fn(pos, *chunk); + } + } + + template + void forEachChunk(Fn&& fn) const { + for (auto& [pos, chunk] : chunks_) { + fn(pos, *chunk); + } + } + + size_t chunkCount() const { return chunks_.size(); } + + // Material palette + MaterialDesc materials[256]; + void setupDefaultMaterials(); + +private: + void generateChunk(Chunk& chunk); + float noise3D(float x, float y, float z) const; + float fbm(float x, float y, float z, int octaves) const; + + std::unordered_map, ChunkPosHash> chunks_; + uint32_t seed_ = 42; +}; + +} // namespace voxel diff --git a/voxel_engine_spec.docx b/voxel_engine_spec.docx new file mode 100644 index 0000000..7e2b89a Binary files /dev/null and b/voxel_engine_spec.docx differ