diff --git a/bot_bottle/backend/macos_container/launch.py b/bot_bottle/backend/macos_container/launch.py index 2641e2d..a7dfea3 100644 --- a/bot_bottle/backend/macos_container/launch.py +++ b/bot_bottle/backend/macos_container/launch.py @@ -264,7 +264,7 @@ def _agent_run_argv( def _sidecar_dns() -> str: - return os.environ.get("BOT_BOTTLE_MACOS_CONTAINER_DNS", "1.1.1.1") + return container_mod.dns_server() def _sidecar_daemons(plan: MacosContainerBottlePlan) -> tuple[str, ...]: diff --git a/bot_bottle/backend/macos_container/util.py b/bot_bottle/backend/macos_container/util.py index 828d3cf..5ee92fe 100644 --- a/bot_bottle/backend/macos_container/util.py +++ b/bot_bottle/backend/macos_container/util.py @@ -3,6 +3,8 @@ from __future__ import annotations import json +import os +import ipaddress import platform import shutil import subprocess @@ -12,6 +14,7 @@ from ...log import die, info _CONTAINER = "container" +_DEFAULT_DNS = "1.1.1.1" def is_macos() -> bool: @@ -33,19 +36,157 @@ def require_container() -> None: die("container not found") +def dns_server() -> str: + override = os.environ.get("BOT_BOTTLE_MACOS_CONTAINER_DNS", "").strip() + if override: + return override + return _host_ipv4_dns() or _DEFAULT_DNS + + def build_image(ref: str, context: str, *, dockerfile: str = "") -> None: """Build an OCI image with Apple's BuildKit-backed `container build`.""" info( f"building image {ref} from {context} with Apple Container " "(layer cache keeps repeat builds fast)" ) - args = [_CONTAINER, "build", "-t", ref] + _ensure_builder_dns() + args = [_CONTAINER, "build", "-t", ref, "--dns", dns_server()] if dockerfile: args.extend(["-f", dockerfile]) args.append(context) subprocess.run(args, check=True) +def _ensure_builder_dns() -> None: + dns = dns_server() + status = _builder_status() + override = os.environ.get("BOT_BOTTLE_MACOS_CONTAINER_DNS", "").strip() + if _builder_running(status) and _builder_resolves_build_hosts(): + if override and not _builder_has_dns(status, dns): + _restart_builder_with_dns(dns) + return + _restart_builder_with_dns(dns) + + +def _restart_builder_with_dns(dns: str) -> None: + subprocess.run( + [_CONTAINER, "builder", "stop"], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=False, + ) + subprocess.run( + [_CONTAINER, "builder", "start", "--dns", dns], + check=True, + ) + + +def _host_ipv4_dns() -> str: + if not is_macos(): + return "" + result = subprocess.run( + ["scutil", "--dns"], + capture_output=True, + text=True, + check=False, + ) + if result.returncode != 0: + return "" + blocks: list[list[str]] = [] + current: list[str] = [] + for line in result.stdout.splitlines(): + if line.startswith("resolver #") and current: + blocks.append(current) + current = [] + current.append(line) + if current: + blocks.append(current) + for direct_only in (True, False): + for block in blocks: + text = "\n".join(block) + if direct_only and "Directly Reachable Address" not in text: + continue + for line in block: + if "nameserver[" not in line or ":" not in line: + continue + candidate = line.split(":", 1)[1].strip() + if _usable_ipv4(candidate): + return candidate + return "" + + +def _usable_ipv4(value: str) -> bool: + try: + address = ipaddress.ip_address(value) + except ValueError: + return False + return ( + address.version == 4 + and not address.is_loopback + and not address.is_link_local + and not address.is_multicast + and not address.is_unspecified + ) + + +def _builder_status() -> list[dict[str, object]]: + result = subprocess.run( + [_CONTAINER, "builder", "status", "--format", "json"], + capture_output=True, + text=True, + check=False, + ) + if result.returncode != 0: + return [] + try: + data = json.loads(result.stdout or "[]") + except json.JSONDecodeError: + return [] + if isinstance(data, list): + return [entry for entry in data if isinstance(entry, dict)] + if isinstance(data, dict): + return [data] + return [] + + +def _builder_running(status: list[dict[str, object]]) -> bool: + for entry in status: + entry_status = entry.get("status") + if isinstance(entry_status, dict) and entry_status.get("state") == "running": + return True + return False + + +def _builder_dns_nameservers(status: list[dict[str, object]]) -> list[str]: + out: list[str] = [] + for entry in status: + config = entry.get("configuration") + config_dns = config.get("dns") if isinstance(config, dict) else None + nameservers = ( + config_dns.get("nameservers") + if isinstance(config_dns, dict) + else None + ) + if not isinstance(nameservers, list): + continue + out.extend(name for name in nameservers if isinstance(name, str)) + return out + + +def _builder_has_dns(status: list[dict[str, object]], dns: str) -> bool: + return dns in _builder_dns_nameservers(status) + + +def _builder_resolves_build_hosts() -> bool: + result = subprocess.run( + [_CONTAINER, "exec", "buildkit", "getent", "hosts", "deb.debian.org"], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=False, + ) + return result.returncode == 0 + + def image_exists(ref: str) -> bool: return _silent_run([_CONTAINER, "image", "inspect", ref]) == 0 diff --git a/docs/prds/prd-new-macos-container-backend.md b/docs/prds/prd-new-macos-container-backend.md index a4f9745..570595c 100644 --- a/docs/prds/prd-new-macos-container-backend.md +++ b/docs/prds/prd-new-macos-container-backend.md @@ -9,10 +9,10 @@ Add an experimental `macos-container` backend that integrates Apple's `container` CLI as a host runtime on macOS. The first shipped slice -registers the backend, implements the reusable host primitives +registers the backend and implements reusable host primitives (`build`, `exec`, `cp`, image inspection, cleanup, active -enumeration), and blocks full launch behind an explicit network -enforcement guard. This creates a real integration point without +enumeration). Follow-up slices make launch runnable with the proven +two-network sidecar topology and add real-runtime coverage, without weakening bot-bottle's sidecar egress model. ## Problem @@ -49,10 +49,15 @@ path around the egress sidecar. - The backend has tested wrappers for Apple Container image build, image inspection, container `exec`, container `cp`, cleanup, and active-agent enumeration. -- Full launch fails loudly with an operator-facing message until the - sidecar network enforcement design is implemented. -- The PRD records the remaining launch work so the next PR can make the - backend runnable without revisiting registration or wrapper plumbing. +- Full launch uses a host-only internal network for the agent and a + separate NAT egress network for the sidecar bundle. +- The agent container does not attach to the egress network. It reaches + allowed outbound hosts through HTTP(S)_PROXY pointing at the + sidecar's internal-network IP. +- `bottle.git` / git-gate bottles fail loudly on this backend until a + safe Apple Container key-delivery path exists. +- Real-runtime integration coverage is present and guarded by macOS and + Apple Container availability. ## Non-goals @@ -101,25 +106,38 @@ The bottle handle mirrors `DockerBottle`: it builds a host argv for foreground agent execution, pipes shell snippets through stdin for `Bottle.exec`, and exposes `cp_in` for provisioning. -### Launch guard +### Launch topology -`launch()` is intentionally not enabled in the first slice. It exits -with a fatal message explaining that sidecar network enforcement still -needs implementation. +`launch()` uses Apple Container's two-network topology: -This is deliberate. A runnable backend that places the agent on a -normal outbound network while relying on environment variables for -proxying would violate bot-bottle's egress model. The runnable version -must prove one of these shapes: +- create a host-only internal network for the bottle; +- create a normal NAT egress network for the sidecar bundle; +- start the sidecar bundle attached to the egress network first and the + internal network second; +- discover the sidecar's internal-network IPv4 address from + `container inspect`; +- start the agent attached only to the internal network, with + HTTP_PROXY / HTTPS_PROXY / lowercase proxy vars pointing at the + sidecar IP and egress port. -- Apple Container supports the equivalent of Docker's two-network - sidecar topology: agent on an internal-only network, sidecar on both - internal and egress networks. -- The sidecar bundle runs as a separate VM/container with published - loopback ports, and the agent runtime can be constrained to only - reach that per-bottle loopback alias. -- Apple Container init/network hooks can enforce the egress sidecar as - the only outbound path before the agent process starts. +This keeps the agent off the outbound network while preserving the +proxy-env contract that existing agent tooling already honors. The +integration smoke also removes the proxy env in-guest and confirms +direct egress fails. + +### Deferred git-gate support + +Apple Container currently rejects single-file bind mounts, and +`container cp` into a stopped container is not available. Starting the +container earlier would allow `container cp` into a running container, +but it would also mean delivering SSH private key material into a live +sidecar before the git-gate daemon is ready to own it. Mounting broad +host SSH directories is not acceptable. + +For this PRD, `bottle.git` / git-gate support is explicitly deferred on +the `macos-container` backend. Bottles with git-gate upstreams fail +loudly and should use `docker` or `smolmachines` until a narrower key +delivery design lands. ## Implementation chunks @@ -147,8 +165,19 @@ must prove one of these shapes: - Unit tests cover `MacosContainerBottle` command construction and stdin-based shell execution. - Unit tests cover cleanup and active enumeration parsing. -- Future integration tests must run on a host with Apple Container - installed and should verify egress cannot bypass the sidecar. +- Unit tests cover launch argv/env construction, sidecar mount + staging, sidecar IP parsing, and git-gate rejection. +- Integration tests run on macOS hosts with Apple Container installed + and verify that egress cannot bypass the sidecar. They also preflight + Apple Container BuildKit DNS because image builds must resolve + package mirrors before a launch smoke can be meaningful. The backend + probes the running builder before image builds and leaves it alone + when its current resolver works. If the probe fails, or if the + operator explicitly sets `BOT_BOTTLE_MACOS_CONTAINER_DNS`, the backend + restarts the Apple Container builder with the configured DNS server. + Without an explicit override, that server is discovered from the + host's directly reachable IPv4 resolver before falling back to a + public resolver. ## References diff --git a/tests/integration/test_macos_container_launch.py b/tests/integration/test_macos_container_launch.py new file mode 100644 index 0000000..9ce384f --- /dev/null +++ b/tests/integration/test_macos_container_launch.py @@ -0,0 +1,239 @@ +"""Integration: macOS Container launch topology. + +End-to-end against Apple's real `container` runtime. The smoke launches +a bottle with the experimental macOS Container backend and verifies the +properties that make the explicit-proxy launch acceptable: + + - the agent can exec commands after provisioning; + - HTTP(S)_PROXY points at the sidecar's internal-network IP; + - allowlisted HTTPS reaches the egress sidecar; + - direct egress with proxy env removed fails from the internal-only + agent network; + - non-allowlisted proxy traffic is blocked. + +Skipped under Gitea Actions and on hosts without Apple's `container`. +""" + +from __future__ import annotations + +import os +import platform +import shutil +import subprocess +import tempfile +import unittest +from pathlib import Path + +from bot_bottle.backend import BottleSpec, get_bottle_backend +from bot_bottle.backend.macos_container.util import ( + dns_server as _container_dns_server, + is_available as _container_available, +) +from bot_bottle.manifest import Manifest + + +_AGENT_PROMPT = "You are a launch smoke-test agent. Be brief." + + +def _minimal_agent_dockerfile(path: Path) -> None: + path.write_text( + "\n".join(( + "FROM node:22-slim", + "RUN apt-get update \\", + " && apt-get install -y --no-install-recommends \\", + " ca-certificates curl git \\", + " && rm -rf /var/lib/apt/lists/*", + "USER node", + "WORKDIR /home/node", + "CMD [\"sleep\", \"infinity\"]", + "", + )), + encoding="utf-8", + ) + + +def _minimal_manifest(dockerfile: Path) -> Manifest: + return Manifest.from_json_obj({ + "bottles": { + "dev": { + "agent_provider": { + "template": "pi", + "dockerfile": str(dockerfile), + "settings": { + "provider": "example", + "base_url": "https://example.com/v1", + "models": ["smoke"], + }, + }, + "egress": { + "routes": [ + {"host": "example.com"}, + ], + }, + }, + }, + "agents": { + "demo": { + "skills": [], + "prompt": _AGENT_PROMPT, + "bottle": "dev", + }, + }, + }) + + +def _buildkit_dns_available() -> bool: + if platform.system() != "Darwin" or not _container_available(): + return False + stage = Path(tempfile.mkdtemp(prefix="cb-container-buildkit-dns.")) + image = "bot-bottle-buildkit-dns-check:latest" + try: + dockerfile = stage / "Dockerfile" + dockerfile.write_text( + "FROM debian:bookworm-slim\n" + "RUN getent hosts deb.debian.org\n", + encoding="utf-8", + ) + result = subprocess.run( + [ + "container", "build", + "--dns", _container_dns_server(), + "-t", image, + "-f", str(dockerfile), + str(stage), + ], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=False, + ) + return result.returncode == 0 + finally: + subprocess.run( + ["container", "image", "delete", image], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=False, + ) + shutil.rmtree(stage, ignore_errors=True) + + +@unittest.skipIf( + os.environ.get("GITEA_ACTIONS") == "true", + "skipped under act_runner: cannot host Apple Container VMs", +) +@unittest.skipUnless( + platform.system() == "Darwin", + "Apple Container is macOS-only", +) +@unittest.skipUnless( + _container_available(), + "Apple Container not on PATH; install from " + "https://github.com/apple/container/releases", +) +@unittest.skipUnless( + _buildkit_dns_available(), + "Apple Container BuildKit cannot resolve deb.debian.org on this host", +) +class TestMacosContainerLaunch(unittest.TestCase): + """Launch once and reuse the bottle across probes.""" + + @classmethod + def setUpClass(cls) -> None: + cls.stage = Path(tempfile.mkdtemp(prefix="cb-macos-container-launch.")) + cls._launch = None + cls.bottle = None + dockerfile = cls.stage / "Dockerfile.agent-smoke" + _minimal_agent_dockerfile(dockerfile) + os.environ["BOT_BOTTLE_BACKEND"] = "macos-container" + try: + backend = get_bottle_backend() + spec = BottleSpec( + manifest=_minimal_manifest(dockerfile), + agent_name="demo", + copy_cwd=False, + user_cwd=str(cls.stage), + ) + cls.plan = backend.prepare(spec, stage_dir=cls.stage) + cls._launch = backend.launch(cls.plan) + cls.bottle = cls._launch.__enter__() + except BaseException: + if cls._launch is not None: + cls._launch.__exit__(None, None, None) + shutil.rmtree(cls.stage, ignore_errors=True) + os.environ.pop("BOT_BOTTLE_BACKEND", None) + raise + + @classmethod + def tearDownClass(cls) -> None: + try: + if cls._launch is not None: + cls._launch.__exit__(None, None, None) + finally: + shutil.rmtree(cls.stage, ignore_errors=True) + os.environ.pop("BOT_BOTTLE_BACKEND", None) + + def test_smoke_exec_echo(self): + r = self.bottle.exec( # type: ignore[union-attr] + "echo hello-from-macos-container" + ) + self.assertEqual(0, r.returncode, msg=r.stderr) + self.assertIn("hello-from-macos-container", r.stdout) + + def test_proxy_env_points_at_sidecar_internal_ip(self): + r = self.bottle.exec( # type: ignore[union-attr] + "printf '%s\n' \"$HTTPS_PROXY\" \"$HTTP_PROXY\" " + "\"$NO_PROXY\" \"$NODE_EXTRA_CA_CERTS\"" + ) + self.assertEqual(0, r.returncode, msg=r.stderr) + values = [line.strip() for line in r.stdout.splitlines()] + self.assertEqual(4, len(values), values) + self.assertEqual(values[0], values[1], values) + self.assertRegex(values[0], r"^http://[0-9.]+:9099$") + self.assertNotIn("127.0.0.1", values[0]) + sidecar_host = values[0].removeprefix("http://").removesuffix(":9099") + self.assertIn(sidecar_host, values[2]) + self.assertEqual( + "/usr/local/share/ca-certificates/bot-bottle-mitm-ca.crt", + values[3], + ) + + def test_allowlisted_https_reaches_egress_proxy(self): + r = self.bottle.exec( # type: ignore[union-attr] + "curl -fsS --max-time 20 https://example.com >/dev/null && echo OK" + ) + self.assertEqual(0, r.returncode, msg=r.stderr + r.stdout) + self.assertIn("OK", r.stdout) + + def test_direct_egress_bypass_without_proxy_fails(self): + r = self.bottle.exec( # type: ignore[union-attr] + "env -u HTTPS_PROXY -u HTTP_PROXY -u https_proxy -u http_proxy " + "curl -s --show-error --max-time 5 https://example.com 2>&1 || true" + ) + self.assertTrue( + "refused" in r.stdout.lower() + or "timed out" in r.stdout.lower() + or "unreachable" in r.stdout.lower() + or "failed" in r.stdout.lower() + or "could not resolve" in r.stdout.lower() + or "connection reset" in r.stdout.lower(), + f"expected direct egress to fail; got: {r.stdout!r}", + ) + + def test_non_allowlisted_host_fails_through_proxy(self): + r = self.bottle.exec( # type: ignore[union-attr] + "curl -s --show-error --max-time 10 https://iana.org 2>&1 || true" + ) + self.assertTrue( + "403" in r.stdout + or "502" in r.stdout + or "blocked" in r.stdout.lower() + or "not allowed" in r.stdout.lower() + or "not in the bottle's egress.routes allowlist" in r.stdout.lower() + or "forbidden" in r.stdout.lower() + or "failed" in r.stdout.lower(), + f"expected non-allowlisted proxy request to fail; got: {r.stdout!r}", + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/unit/test_macos_container_util.py b/tests/unit/test_macos_container_util.py index ce4cceb..9789a29 100644 --- a/tests/unit/test_macos_container_util.py +++ b/tests/unit/test_macos_container_util.py @@ -28,17 +28,137 @@ class TestMacosContainerAvailability(unittest.TestCase): class TestMacosContainerCommands(unittest.TestCase): + def test_dns_server_prefers_direct_host_ipv4_resolver(self): + scutil = util.subprocess.CompletedProcess( + args=[], + returncode=0, + stdout=""" +resolver #1 + nameserver[0] : 100.100.100.100 + reach : 0x00000003 (Reachable,Transient Connection) + +resolver #2 + nameserver[0] : 2600:4041:5c43:b900::1 + nameserver[1] : 192.168.1.1 + reach : 0x00020002 (Reachable,Directly Reachable Address) +""", + stderr="", + ) + with patch.object(util.os, "environ", {}), \ + patch.object(util.platform, "system", return_value="Darwin"), \ + patch.object(util.subprocess, "run", return_value=scutil): + self.assertEqual("192.168.1.1", util.dns_server()) + def test_build_image(self): - with patch.object(util.subprocess, "run") as run: + status = util.subprocess.CompletedProcess( + args=[], + returncode=0, + stdout=( + '[{"status":{"state":"running"},' + '"configuration":{"dns":{"nameservers":["9.9.9.9"]}}}]' + ), + stderr="", + ) + with patch.object(util.subprocess, "run", return_value=status) as run, \ + patch.object(util.os, "environ", { + "BOT_BOTTLE_MACOS_CONTAINER_DNS": "9.9.9.9", + }): util.build_image("bot-bottle-agent:latest", "/repo", dockerfile="/repo/Dockerfile") self.assertEqual( [ "container", "build", "-t", "bot-bottle-agent:latest", - "-f", "/repo/Dockerfile", "/repo", + "--dns", "9.9.9.9", "-f", "/repo/Dockerfile", "/repo", ], - run.call_args.args[0], + run.call_args_list[-1].args[0], + ) + self.assertTrue(run.call_args_list[-1].kwargs["check"]) + + def test_build_image_restarts_builder_when_dns_mismatches(self): + status = util.subprocess.CompletedProcess( + args=[], + returncode=0, + stdout=( + '[{"status":{"state":"running"},' + '"configuration":{"dns":{"nameservers":[]}}}]' + ), + stderr="", + ) + with patch.object(util.subprocess, "run", return_value=status) as run, \ + patch.object(util.os, "environ", { + "BOT_BOTTLE_MACOS_CONTAINER_DNS": "9.9.9.9", + }): + util.build_image("bot-bottle-agent:latest", "/repo") + calls = [c.args[0] for c in run.call_args_list] + self.assertIn(["container", "builder", "stop"], calls) + self.assertIn( + ["container", "builder", "start", "--dns", "9.9.9.9"], + calls, + ) + self.assertEqual( + [ + "container", "build", "-t", "bot-bottle-agent:latest", + "--dns", "9.9.9.9", "/repo", + ], + calls[-1], + ) + + def test_build_image_leaves_working_builder_with_different_dns_alone(self): + status = util.subprocess.CompletedProcess( + args=[], + returncode=0, + stdout=( + '[{"status":{"state":"running"},' + '"configuration":{"dns":{"nameservers":["8.8.8.8"]}}}]' + ), + stderr="", + ) + probe = util.subprocess.CompletedProcess( + args=[], returncode=0, stdout="", stderr="", + ) + build = util.subprocess.CompletedProcess( + args=[], returncode=0, stdout="", stderr="", + ) + with patch.object(util, "dns_server", return_value="192.168.1.1"), \ + patch.object(util.os, "environ", {}), \ + patch.object(util.subprocess, "run", side_effect=[status, probe, build]) as run: + util.build_image("bot-bottle-agent:latest", "/repo") + calls = [c.args[0] for c in run.call_args_list] + self.assertNotIn(["container", "builder", "stop"], calls) + self.assertNotIn( + ["container", "builder", "start", "--dns", "192.168.1.1"], + calls, + ) + + def test_build_image_restarts_builder_when_dns_probe_fails(self): + status = util.subprocess.CompletedProcess( + args=[], + returncode=0, + stdout=( + '[{"status":{"state":"running"},' + '"configuration":{"dns":{"nameservers":["8.8.8.8"]}}}]' + ), + stderr="", + ) + failed_probe = util.subprocess.CompletedProcess( + args=[], returncode=2, stdout="", stderr="", + ) + ok = util.subprocess.CompletedProcess( + args=[], returncode=0, stdout="", stderr="", + ) + with patch.object(util, "dns_server", return_value="192.168.1.1"), \ + patch.object(util.os, "environ", {}), \ + patch.object( + util.subprocess, + "run", + side_effect=[status, failed_probe, ok, ok, ok], + ) as run: + util.build_image("bot-bottle-agent:latest", "/repo") + calls = [c.args[0] for c in run.call_args_list] + self.assertIn(["container", "builder", "stop"], calls) + self.assertIn( + ["container", "builder", "start", "--dns", "192.168.1.1"], + calls, ) - self.assertTrue(run.call_args.kwargs["check"]) def test_container_exists_parses_quiet_list(self): completed = util.subprocess.CompletedProcess(