test(capability): integration test for apply_capability_change (PRD 0016)
Phase 4 of PRD 0016. End-to-end test against real Docker: - Stages a fake bottle: alpine:latest container named claude-bottle-<slug> with a marker file at /home/node/.claude/sessions.json, plus a fake supervise sidecar. - Calls apply_capability_change with a new Dockerfile. - Verifies: per-bottle Dockerfile written, agent + sidecars removed, networks removed, transcript snapshot dir on host contains the marker file (proving docker cp transferred bytes). - Subsequent-apply test proves the per-bottle Dockerfile state persists across rebuilds (before-diff uses the prior override, not the repo Dockerfile). - Teardown-idempotent test: apply against a never-started bottle doesn't raise. docker exec / cp / rm / network rm work fine across the docker socket boundary, so this runs in DinD too — no act_runner skip needed. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,217 @@
|
||||
"""Integration: drive `apply_capability_change` against a real
|
||||
container that mimics the agent's name + filesystem layout (PRD 0016).
|
||||
|
||||
The real `cli.py start <agent>` flow is too heavy for an integration
|
||||
test (it builds the agent image, brings up all the sidecars, attaches
|
||||
an interactive claude session). Instead, this test stages the
|
||||
minimum the orchestrator interacts with:
|
||||
|
||||
- A lightweight `alpine:latest sleep infinity` container named
|
||||
`claude-bottle-<slug>` (matches the agent container name pattern)
|
||||
on the per-bottle internal network.
|
||||
- A marker file under `/home/node/.claude/` so we can assert the
|
||||
transcript snapshot path actually transferred bytes.
|
||||
|
||||
Then `apply_capability_change` runs and we verify:
|
||||
- Per-bottle Dockerfile written.
|
||||
- Containers + networks removed.
|
||||
- Transcript snapshot dir on the host has the marker file.
|
||||
|
||||
docker exec / cp / rm work across the docker socket boundary, so
|
||||
this test runs in DinD too — no act_runner skip needed.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import tempfile
|
||||
import time
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
|
||||
from claude_bottle import supervise
|
||||
from claude_bottle.backend.docker import bottle_state, capability_apply
|
||||
from claude_bottle.backend.docker.capability_apply import apply_capability_change
|
||||
from claude_bottle.backend.docker.network import (
|
||||
network_create_egress,
|
||||
network_create_internal,
|
||||
network_remove,
|
||||
)
|
||||
from tests._docker import skip_unless_docker
|
||||
|
||||
|
||||
ALPINE_IMAGE = "alpine:latest"
|
||||
|
||||
|
||||
@skip_unless_docker()
|
||||
class TestCapabilityApply(unittest.TestCase):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
r = subprocess.run(
|
||||
["docker", "pull", ALPINE_IMAGE],
|
||||
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False,
|
||||
)
|
||||
if r.returncode != 0:
|
||||
raise unittest.SkipTest(f"could not pull {ALPINE_IMAGE}")
|
||||
|
||||
def setUp(self):
|
||||
self.slug = f"cb-test-cap-{os.getpid()}-{int(time.time())}"
|
||||
self.agent_name = f"claude-bottle-{self.slug}"
|
||||
self.sidecar_names: list[str] = []
|
||||
self.internal_net = ""
|
||||
self.egress_net = ""
|
||||
# Fake home so tests don't touch ~/.claude-bottle/.
|
||||
self._tmp = tempfile.TemporaryDirectory(prefix="cap-apply-int.")
|
||||
self._original_root = supervise.claude_bottle_root
|
||||
|
||||
def fake_root() -> Path:
|
||||
return Path(self._tmp.name) / ".claude-bottle"
|
||||
|
||||
supervise.claude_bottle_root = fake_root # type: ignore[assignment]
|
||||
|
||||
def tearDown(self):
|
||||
supervise.claude_bottle_root = self._original_root # type: ignore[assignment]
|
||||
for name in [self.agent_name, *self.sidecar_names]:
|
||||
subprocess.run(
|
||||
["docker", "rm", "-f", name],
|
||||
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False,
|
||||
)
|
||||
for n in (self.internal_net, self.egress_net):
|
||||
if n:
|
||||
network_remove(n)
|
||||
self._tmp.cleanup()
|
||||
|
||||
def _bring_up_fake_bottle(self) -> None:
|
||||
self.internal_net = network_create_internal(self.slug)
|
||||
self.egress_net = network_create_egress(self.slug)
|
||||
# Agent container with the canonical name.
|
||||
r = subprocess.run(
|
||||
[
|
||||
"docker", "run", "-d",
|
||||
"--name", self.agent_name,
|
||||
"--network", self.internal_net,
|
||||
ALPINE_IMAGE,
|
||||
"sh", "-c",
|
||||
"mkdir -p /home/node/.claude && "
|
||||
"echo 'transcript-marker' > /home/node/.claude/sessions.json && "
|
||||
"sleep 3600",
|
||||
],
|
||||
capture_output=True, text=True, check=False,
|
||||
)
|
||||
self.assertEqual(0, r.returncode, r.stderr)
|
||||
# Also start a fake supervise sidecar so teardown has something
|
||||
# extra to clean up (mirrors a real bottle's container set).
|
||||
sidecar = f"claude-bottle-supervise-{self.slug}"
|
||||
subprocess.run(
|
||||
[
|
||||
"docker", "run", "-d",
|
||||
"--name", sidecar,
|
||||
"--network", self.internal_net,
|
||||
ALPINE_IMAGE, "sleep", "3600",
|
||||
],
|
||||
capture_output=True, text=True, check=False,
|
||||
)
|
||||
self.sidecar_names.append(sidecar)
|
||||
|
||||
def _containers_named_like(self) -> list[str]:
|
||||
"""All running/stopped containers whose names start with
|
||||
the bottle's slug — both agent + sidecars."""
|
||||
r = subprocess.run(
|
||||
[
|
||||
"docker", "ps", "-a",
|
||||
"--filter", f"name={self.agent_name}",
|
||||
"--format", "{{.Names}}",
|
||||
],
|
||||
capture_output=True, text=True, check=False,
|
||||
)
|
||||
return [line for line in (r.stdout or "").splitlines() if line]
|
||||
|
||||
def _networks_named_like(self) -> list[str]:
|
||||
r = subprocess.run(
|
||||
[
|
||||
"docker", "network", "ls",
|
||||
"--filter", f"name={self.slug}",
|
||||
"--format", "{{.Name}}",
|
||||
],
|
||||
capture_output=True, text=True, check=False,
|
||||
)
|
||||
return [line for line in (r.stdout or "").splitlines() if line]
|
||||
|
||||
def test_apply_writes_dockerfile_and_tears_down(self):
|
||||
self._bring_up_fake_bottle()
|
||||
self.assertIn(self.agent_name, self._containers_named_like())
|
||||
|
||||
new_dockerfile = "FROM python:3.13\nRUN apk add ripgrep\n"
|
||||
before, after = apply_capability_change(self.slug, new_dockerfile)
|
||||
|
||||
# Before is the repo Dockerfile (no prior per-bottle override);
|
||||
# after is what we passed in.
|
||||
self.assertIn("FROM ", before)
|
||||
self.assertEqual(new_dockerfile, after)
|
||||
|
||||
# Per-bottle Dockerfile written on the host.
|
||||
self.assertEqual(
|
||||
new_dockerfile,
|
||||
bottle_state.per_bottle_dockerfile(self.slug),
|
||||
)
|
||||
|
||||
# Agent + sidecars gone.
|
||||
self.assertEqual([], self._containers_named_like())
|
||||
# Networks removed (matching the slug substring).
|
||||
nets = self._networks_named_like()
|
||||
self.assertEqual([], nets)
|
||||
# Mark them as already cleaned so tearDown is idempotent.
|
||||
self.internal_net = ""
|
||||
self.egress_net = ""
|
||||
self.sidecar_names = []
|
||||
|
||||
def test_transcript_snapshot_captured(self):
|
||||
self._bring_up_fake_bottle()
|
||||
apply_capability_change(self.slug, "FROM x\n")
|
||||
snap = bottle_state.transcript_snapshot_dir(self.slug)
|
||||
self.assertTrue(snap.is_dir(), f"transcript snapshot dir {snap} missing")
|
||||
# docker cp <container>:/home/node/.claude <dst> produces
|
||||
# <dst>/.claude/sessions.json (it preserves the source dir name
|
||||
# inside the destination if the destination already exists).
|
||||
# Walk the snapshot looking for the marker contents.
|
||||
marker_found = False
|
||||
for path in snap.rglob("sessions.json"):
|
||||
if "transcript-marker" in path.read_text():
|
||||
marker_found = True
|
||||
break
|
||||
self.assertTrue(marker_found, f"marker not found under {snap}")
|
||||
# Cleaned up by apply already.
|
||||
self.internal_net = ""
|
||||
self.egress_net = ""
|
||||
self.sidecar_names = []
|
||||
|
||||
def test_subsequent_apply_uses_per_bottle_dockerfile_for_before(self):
|
||||
# First change: before is repo's Dockerfile.
|
||||
self._bring_up_fake_bottle()
|
||||
first_before, _ = apply_capability_change(self.slug, "FROM v1\n")
|
||||
self.assertIn("FROM ", first_before)
|
||||
|
||||
# Second change: before is "FROM v1\n" (the per-bottle override
|
||||
# from the first change), proving the state persists across
|
||||
# rebuilds.
|
||||
self._bring_up_fake_bottle()
|
||||
second_before, second_after = apply_capability_change(self.slug, "FROM v2\n")
|
||||
self.assertEqual("FROM v1\n", second_before)
|
||||
self.assertEqual("FROM v2\n", second_after)
|
||||
self.internal_net = ""
|
||||
self.egress_net = ""
|
||||
self.sidecar_names = []
|
||||
|
||||
def test_teardown_idempotent_when_nothing_running(self):
|
||||
# No bottle ever brought up — teardown still doesn't raise.
|
||||
apply_capability_change(self.slug, "FROM x\n")
|
||||
self.assertEqual(
|
||||
"FROM x\n",
|
||||
bottle_state.per_bottle_dockerfile(self.slug),
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user