Chessistics/tools/automation/harness.py

321 lines
10 KiB
Python
Raw Permalink Normal View History

"""
Thin Python wrapper around the file-based Chessistics automation IPC.
Usage:
from harness import Harness
with Harness.launch() as h:
h.load_mission("campaign_01", 0)
h.screenshot("00_initial")
print(h.state()["phase"])
h.place("Rook", (0, 0), (0, 3))
h.step()
h.screenshot("01_after_step")
No third-party dependencies stdlib only.
"""
from __future__ import annotations
# Guard against user-site `json_extensions` namespace packages that shadow the
# stdlib json module. Harmless if nothing is shadowing.
import sys as _sys
for _k in [k for k in list(_sys.modules) if k == "json" or k.startswith("json.")]:
if _sys.modules[_k].__file__ is None: # namespace package → purge
del _sys.modules[_k]
_sys.path[:] = [p for p in _sys.path if "Roaming\\Python" not in p and "Roaming/Python" not in p]
import json
import os
import subprocess
import sys
import time
import uuid
from contextlib import contextmanager
from pathlib import Path
from typing import Any
# Resolve defaults relative to the repo root (parent of tools/).
_REPO_ROOT = Path(__file__).resolve().parents[2]
_DEFAULT_RUNS = _REPO_ROOT / ".automation_runs"
Headless Linux dev container: Godot + .NET + Xvfb for autonomous testing Claude Code running inside the project's dev container can now build the game, launch a real Godot instance under Xvfb, and drive the automation harness end-to-end — no Windows dependency. Dockerfile adds (as root, before USER node): - X11 / Mesa software GL / audio runtime deps + python3 - .NET SDK 9.0 via upstream dot.net install script -> /usr/local/dotnet - Godot 4.6.2-stable mono Linux x86_64 -> /opt/godot/godot - /usr/local/bin/godot-xvfb wrapper: auto-wraps invocations in xvfb-run -a --server-args="-screen 0 1280x720x24 ..." harness.py picks GODOT_BIN from env, defaults to /opt/godot/godot on Linux, and auto-wraps the subprocess in xvfb-run when DISPLAY is unset. Windows code path unchanged. init-firewall.sh adds api.nuget.org to the allowlist so dotnet restore works post-boot. Godot + .NET SDK are fetched at image build time, before the firewall exists. New docs: - autonomous_plan.md: design rationale, alternatives considered - README.md: launch instructions for Windows terminal / Docker Desktop / VS Code Dev Containers / WSL2 natif - CLAUDE.md already documents the harness (done in previous commit) Validation: docker build succeeds; inside the container, dotnet --version =9.0.313, godot --version=4.6.2.stable.mono, dotnet test=102/102, python3 tools/automation/smoke.py passes end-to-end with 14 non-black 1280x720 PNGs. Mission 1 screenshot is visually identical to the Windows build, and Xvfb determinism is a bonus (det_a.png ≡ det_b.png bytewise).
2026-04-17 16:57:56 +02:00
def _default_godot_exe() -> Path:
"""Locate a Godot binary. Env var wins; otherwise platform defaults."""
env = os.environ.get("GODOT_BIN")
if env:
return Path(env)
if sys.platform.startswith("linux"):
return Path("/opt/godot/godot")
return Path(r"C:\Apps\godot\Godot_v4.6.2-stable_mono_win64_console.exe")
_DEFAULT_GODOT = _default_godot_exe()
def _wrap_with_xvfb(argv: list[str]) -> list[str]:
"""On Linux without an existing DISPLAY, wrap Godot in xvfb-run so the
GL-compatibility renderer has a framebuffer to target.
"""
if not sys.platform.startswith("linux"):
return argv
if os.environ.get("DISPLAY"):
return argv
return [
"xvfb-run", "-a",
"--server-args=-screen 0 1280x720x24 -ac +extension GLX +render -noreset",
*argv,
]
class HarnessError(RuntimeError):
"""Raised when a command fails or times out."""
class Harness:
"""Drives a running Chessistics build via file-based IPC.
The game writes `<root>/ready.json` when the automation node is live,
reads commands from `<root>/inbox/<id>.json`, and writes results to
`<root>/outbox/<id>.json`. Screenshots land in `<root>/screens/`.
"""
def __init__(
self,
root: Path,
godot_exe: Path | None = None,
project_path: Path | None = None,
) -> None:
self.root = Path(root).resolve()
self.godot_exe = Path(godot_exe or _DEFAULT_GODOT)
self.project_path = Path(project_path or _REPO_ROOT)
self.inbox = self.root / "inbox"
self.outbox = self.root / "outbox"
self.screens = self.root / "screens"
self.ready_file = self.root / "ready.json"
self._proc: subprocess.Popen[bytes] | None = None
self._seq = 0
# ---------------- lifecycle ----------------
@classmethod
def launch(
cls,
run_name: str | None = None,
godot_exe: Path | None = None,
project_path: Path | None = None,
ready_timeout: float = 20.0,
) -> "Harness":
name = run_name or time.strftime("%Y%m%d_%H%M%S")
root = _DEFAULT_RUNS / name
h = cls(root=root, godot_exe=godot_exe, project_path=project_path)
h.start(ready_timeout=ready_timeout)
return h
def start(self, ready_timeout: float = 20.0) -> None:
# Prepare directories and wipe stale state.
for d in (self.inbox, self.outbox, self.screens):
d.mkdir(parents=True, exist_ok=True)
self._clear_dir(self.inbox)
self._clear_dir(self.outbox)
if self.ready_file.exists():
self.ready_file.unlink()
if not self.godot_exe.exists():
raise HarnessError(f"Godot executable not found: {self.godot_exe}")
if not self.project_path.exists():
raise HarnessError(f"Project path not found: {self.project_path}")
Headless Linux dev container: Godot + .NET + Xvfb for autonomous testing Claude Code running inside the project's dev container can now build the game, launch a real Godot instance under Xvfb, and drive the automation harness end-to-end — no Windows dependency. Dockerfile adds (as root, before USER node): - X11 / Mesa software GL / audio runtime deps + python3 - .NET SDK 9.0 via upstream dot.net install script -> /usr/local/dotnet - Godot 4.6.2-stable mono Linux x86_64 -> /opt/godot/godot - /usr/local/bin/godot-xvfb wrapper: auto-wraps invocations in xvfb-run -a --server-args="-screen 0 1280x720x24 ..." harness.py picks GODOT_BIN from env, defaults to /opt/godot/godot on Linux, and auto-wraps the subprocess in xvfb-run when DISPLAY is unset. Windows code path unchanged. init-firewall.sh adds api.nuget.org to the allowlist so dotnet restore works post-boot. Godot + .NET SDK are fetched at image build time, before the firewall exists. New docs: - autonomous_plan.md: design rationale, alternatives considered - README.md: launch instructions for Windows terminal / Docker Desktop / VS Code Dev Containers / WSL2 natif - CLAUDE.md already documents the harness (done in previous commit) Validation: docker build succeeds; inside the container, dotnet --version =9.0.313, godot --version=4.6.2.stable.mono, dotnet test=102/102, python3 tools/automation/smoke.py passes end-to-end with 14 non-black 1280x720 PNGs. Mission 1 screenshot is visually identical to the Windows build, and Xvfb determinism is a bonus (det_a.png ≡ det_b.png bytewise).
2026-04-17 16:57:56 +02:00
args = _wrap_with_xvfb([
str(self.godot_exe),
"--path", str(self.project_path),
f"--automation={self.root}",
Headless Linux dev container: Godot + .NET + Xvfb for autonomous testing Claude Code running inside the project's dev container can now build the game, launch a real Godot instance under Xvfb, and drive the automation harness end-to-end — no Windows dependency. Dockerfile adds (as root, before USER node): - X11 / Mesa software GL / audio runtime deps + python3 - .NET SDK 9.0 via upstream dot.net install script -> /usr/local/dotnet - Godot 4.6.2-stable mono Linux x86_64 -> /opt/godot/godot - /usr/local/bin/godot-xvfb wrapper: auto-wraps invocations in xvfb-run -a --server-args="-screen 0 1280x720x24 ..." harness.py picks GODOT_BIN from env, defaults to /opt/godot/godot on Linux, and auto-wraps the subprocess in xvfb-run when DISPLAY is unset. Windows code path unchanged. init-firewall.sh adds api.nuget.org to the allowlist so dotnet restore works post-boot. Godot + .NET SDK are fetched at image build time, before the firewall exists. New docs: - autonomous_plan.md: design rationale, alternatives considered - README.md: launch instructions for Windows terminal / Docker Desktop / VS Code Dev Containers / WSL2 natif - CLAUDE.md already documents the harness (done in previous commit) Validation: docker build succeeds; inside the container, dotnet --version =9.0.313, godot --version=4.6.2.stable.mono, dotnet test=102/102, python3 tools/automation/smoke.py passes end-to-end with 14 non-black 1280x720 PNGs. Mission 1 screenshot is visually identical to the Windows build, and Xvfb determinism is a bonus (det_a.png ≡ det_b.png bytewise).
2026-04-17 16:57:56 +02:00
])
print(f"[harness] launching: {' '.join(args)}", file=sys.stderr)
# Inherit stdout/stderr so GD.Print output is visible.
self._proc = subprocess.Popen(args)
# Wait for ready.json handshake.
deadline = time.time() + ready_timeout
while time.time() < deadline:
if self.ready_file.exists():
try:
info = json.loads(self.ready_file.read_text())
print(f"[harness] ready: {info}", file=sys.stderr)
return
except json.JSONDecodeError:
pass
if self._proc.poll() is not None:
raise HarnessError(
f"Godot exited before ready (code={self._proc.returncode})."
)
time.sleep(0.1)
raise HarnessError(f"Timed out waiting for ready.json after {ready_timeout}s.")
def close(self, timeout: float = 5.0) -> None:
if self._proc is None:
return
try:
if self._proc.poll() is None:
# Send quit command if still alive.
try:
self.send("quit", timeout=2.0)
except Exception:
pass
deadline = time.time() + timeout
while time.time() < deadline and self._proc.poll() is None:
time.sleep(0.1)
if self._proc.poll() is None:
self._proc.terminate()
self._proc.wait(timeout=3.0)
finally:
self._proc = None
def __enter__(self) -> "Harness":
return self
def __exit__(self, *_exc) -> None:
self.close()
# ---------------- low-level send ----------------
def send(
self,
cmd: str,
args: dict[str, Any] | None = None,
timeout: float = 15.0,
) -> dict[str, Any]:
self._seq += 1
cmd_id = f"{self._seq:06d}-{uuid.uuid4().hex[:8]}"
envelope = {"id": cmd_id, "cmd": cmd, "args": args or {}}
inbox_path = self.inbox / f"{cmd_id}.json"
outbox_path = self.outbox / f"{cmd_id}.json"
tmp_path = inbox_path.with_suffix(".json.tmp")
tmp_path.write_text(json.dumps(envelope))
os.replace(tmp_path, inbox_path)
deadline = time.time() + timeout
while time.time() < deadline:
if outbox_path.exists():
try:
response = json.loads(outbox_path.read_text())
except json.JSONDecodeError:
time.sleep(0.05)
continue
outbox_path.unlink(missing_ok=True)
if not response.get("ok"):
raise HarnessError(
f"{cmd} failed: {response.get('error', response)}"
)
return response.get("result") or {}
if self._proc and self._proc.poll() is not None:
raise HarnessError(
f"Godot exited during {cmd} (code={self._proc.returncode})."
)
time.sleep(0.05)
raise HarnessError(f"Timed out waiting for {cmd} result after {timeout}s.")
# ---------------- convenience methods ----------------
def screenshot(self, name: str) -> Path:
result = self.send("screenshot", {"name": name})
return Path(result["abs_path"])
def state(self) -> dict[str, Any]:
return self.send("get_state")
def select(self, kind: str) -> dict[str, Any]:
return self.send("select_piece", {"kind": kind})
def place(
self,
kind: str,
start: tuple[int, int],
end: tuple[int, int],
level: int = 1,
) -> dict[str, Any]:
return self.send("place", {
"kind": kind,
"start": list(start),
"end": list(end),
"level": level,
})
def click_cell(self, col: int, row: int, button: str = "left") -> dict[str, Any]:
return self.send("click_cell", {"col": col, "row": row, "button": button})
def key(self, key_name: str) -> dict[str, Any]:
return self.send("key", {"key": key_name})
def play(self) -> dict[str, Any]:
return self.send("play")
def pause(self) -> dict[str, Any]:
return self.send("pause")
def step(self) -> dict[str, Any]:
return self.send("step", timeout=20.0)
def wait_idle(self, timeout_ms: int = 10000) -> dict[str, Any]:
return self.send("wait_idle", {"timeoutMs": timeout_ms})
def set_speed(self, interval: float) -> dict[str, Any]:
return self.send("set_speed", {"interval": interval})
def load_mission(self, campaign: str = "campaign_01", index: int = 0) -> dict[str, Any]:
return self.send("load_mission", {"campaign": campaign, "missionIndex": index}, timeout=20.0)
def back_to_menu(self) -> dict[str, Any]:
return self.send("back_to_menu")
def quick_save(self) -> dict[str, Any]:
return self.send("quick_save")
def quick_load(self) -> dict[str, Any]:
return self.send("quick_load")
def undo(self) -> dict[str, Any]:
return self.send("undo")
def relocate(
self,
piece_id: int,
new_start: tuple[int, int],
new_end: tuple[int, int],
) -> dict[str, Any]:
return self.send("relocate", {
"pieceId": piece_id,
"newStart": list(new_start),
"newEnd": list(new_end),
})
def quit(self) -> dict[str, Any]:
return self.send("quit", timeout=5.0)
# ---------------- private helpers ----------------
@staticmethod
def _clear_dir(p: Path) -> None:
for f in p.iterdir() if p.exists() else []:
try:
f.unlink()
except OSError:
pass
@contextmanager
def launched(**kwargs):
"""Convenience context manager: `with launched() as h: ...`."""
h = Harness.launch(**kwargs)
try:
yield h
finally:
h.close()
if __name__ == "__main__":
# Tiny REPL for manual testing.
with Harness.launch() as h:
print("Ready.", h.root)
print("State:", json.dumps(h.state(), indent=2))