diff --git a/tests/integration/test_capability_apply.py b/tests/integration/test_capability_apply.py new file mode 100644 index 0000000..e45395c --- /dev/null +++ b/tests/integration/test_capability_apply.py @@ -0,0 +1,217 @@ +"""Integration: drive `apply_capability_change` against a real +container that mimics the agent's name + filesystem layout (PRD 0016). + +The real `cli.py start ` flow is too heavy for an integration +test (it builds the agent image, brings up all the sidecars, attaches +an interactive claude session). Instead, this test stages the +minimum the orchestrator interacts with: + + - A lightweight `alpine:latest sleep infinity` container named + `claude-bottle-` (matches the agent container name pattern) + on the per-bottle internal network. + - A marker file under `/home/node/.claude/` so we can assert the + transcript snapshot path actually transferred bytes. + +Then `apply_capability_change` runs and we verify: + - Per-bottle Dockerfile written. + - Containers + networks removed. + - Transcript snapshot dir on the host has the marker file. + +docker exec / cp / rm work across the docker socket boundary, so +this test runs in DinD too — no act_runner skip needed. +""" + +from __future__ import annotations + +import os +import shutil +import subprocess +import tempfile +import time +import unittest +from pathlib import Path + +from claude_bottle import supervise +from claude_bottle.backend.docker import bottle_state, capability_apply +from claude_bottle.backend.docker.capability_apply import apply_capability_change +from claude_bottle.backend.docker.network import ( + network_create_egress, + network_create_internal, + network_remove, +) +from tests._docker import skip_unless_docker + + +ALPINE_IMAGE = "alpine:latest" + + +@skip_unless_docker() +class TestCapabilityApply(unittest.TestCase): + @classmethod + def setUpClass(cls): + r = subprocess.run( + ["docker", "pull", ALPINE_IMAGE], + stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False, + ) + if r.returncode != 0: + raise unittest.SkipTest(f"could not pull {ALPINE_IMAGE}") + + def setUp(self): + self.slug = f"cb-test-cap-{os.getpid()}-{int(time.time())}" + self.agent_name = f"claude-bottle-{self.slug}" + self.sidecar_names: list[str] = [] + self.internal_net = "" + self.egress_net = "" + # Fake home so tests don't touch ~/.claude-bottle/. + self._tmp = tempfile.TemporaryDirectory(prefix="cap-apply-int.") + self._original_root = supervise.claude_bottle_root + + def fake_root() -> Path: + return Path(self._tmp.name) / ".claude-bottle" + + supervise.claude_bottle_root = fake_root # type: ignore[assignment] + + def tearDown(self): + supervise.claude_bottle_root = self._original_root # type: ignore[assignment] + for name in [self.agent_name, *self.sidecar_names]: + subprocess.run( + ["docker", "rm", "-f", name], + stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False, + ) + for n in (self.internal_net, self.egress_net): + if n: + network_remove(n) + self._tmp.cleanup() + + def _bring_up_fake_bottle(self) -> None: + self.internal_net = network_create_internal(self.slug) + self.egress_net = network_create_egress(self.slug) + # Agent container with the canonical name. + r = subprocess.run( + [ + "docker", "run", "-d", + "--name", self.agent_name, + "--network", self.internal_net, + ALPINE_IMAGE, + "sh", "-c", + "mkdir -p /home/node/.claude && " + "echo 'transcript-marker' > /home/node/.claude/sessions.json && " + "sleep 3600", + ], + capture_output=True, text=True, check=False, + ) + self.assertEqual(0, r.returncode, r.stderr) + # Also start a fake supervise sidecar so teardown has something + # extra to clean up (mirrors a real bottle's container set). + sidecar = f"claude-bottle-supervise-{self.slug}" + subprocess.run( + [ + "docker", "run", "-d", + "--name", sidecar, + "--network", self.internal_net, + ALPINE_IMAGE, "sleep", "3600", + ], + capture_output=True, text=True, check=False, + ) + self.sidecar_names.append(sidecar) + + def _containers_named_like(self) -> list[str]: + """All running/stopped containers whose names start with + the bottle's slug — both agent + sidecars.""" + r = subprocess.run( + [ + "docker", "ps", "-a", + "--filter", f"name={self.agent_name}", + "--format", "{{.Names}}", + ], + capture_output=True, text=True, check=False, + ) + return [line for line in (r.stdout or "").splitlines() if line] + + def _networks_named_like(self) -> list[str]: + r = subprocess.run( + [ + "docker", "network", "ls", + "--filter", f"name={self.slug}", + "--format", "{{.Name}}", + ], + capture_output=True, text=True, check=False, + ) + return [line for line in (r.stdout or "").splitlines() if line] + + def test_apply_writes_dockerfile_and_tears_down(self): + self._bring_up_fake_bottle() + self.assertIn(self.agent_name, self._containers_named_like()) + + new_dockerfile = "FROM python:3.13\nRUN apk add ripgrep\n" + before, after = apply_capability_change(self.slug, new_dockerfile) + + # Before is the repo Dockerfile (no prior per-bottle override); + # after is what we passed in. + self.assertIn("FROM ", before) + self.assertEqual(new_dockerfile, after) + + # Per-bottle Dockerfile written on the host. + self.assertEqual( + new_dockerfile, + bottle_state.per_bottle_dockerfile(self.slug), + ) + + # Agent + sidecars gone. + self.assertEqual([], self._containers_named_like()) + # Networks removed (matching the slug substring). + nets = self._networks_named_like() + self.assertEqual([], nets) + # Mark them as already cleaned so tearDown is idempotent. + self.internal_net = "" + self.egress_net = "" + self.sidecar_names = [] + + def test_transcript_snapshot_captured(self): + self._bring_up_fake_bottle() + apply_capability_change(self.slug, "FROM x\n") + snap = bottle_state.transcript_snapshot_dir(self.slug) + self.assertTrue(snap.is_dir(), f"transcript snapshot dir {snap} missing") + # docker cp :/home/node/.claude produces + # /.claude/sessions.json (it preserves the source dir name + # inside the destination if the destination already exists). + # Walk the snapshot looking for the marker contents. + marker_found = False + for path in snap.rglob("sessions.json"): + if "transcript-marker" in path.read_text(): + marker_found = True + break + self.assertTrue(marker_found, f"marker not found under {snap}") + # Cleaned up by apply already. + self.internal_net = "" + self.egress_net = "" + self.sidecar_names = [] + + def test_subsequent_apply_uses_per_bottle_dockerfile_for_before(self): + # First change: before is repo's Dockerfile. + self._bring_up_fake_bottle() + first_before, _ = apply_capability_change(self.slug, "FROM v1\n") + self.assertIn("FROM ", first_before) + + # Second change: before is "FROM v1\n" (the per-bottle override + # from the first change), proving the state persists across + # rebuilds. + self._bring_up_fake_bottle() + second_before, second_after = apply_capability_change(self.slug, "FROM v2\n") + self.assertEqual("FROM v1\n", second_before) + self.assertEqual("FROM v2\n", second_after) + self.internal_net = "" + self.egress_net = "" + self.sidecar_names = [] + + def test_teardown_idempotent_when_nothing_running(self): + # No bottle ever brought up — teardown still doesn't raise. + apply_capability_change(self.slug, "FROM x\n") + self.assertEqual( + "FROM x\n", + bottle_state.per_bottle_dockerfile(self.slug), + ) + + +if __name__ == "__main__": + unittest.main()