diff --git a/README.md b/README.md index 47d81b9..7c40db8 100644 --- a/README.md +++ b/README.md @@ -190,6 +190,27 @@ The container is removed automatically when the session ends. If the script is killed with SIGKILL the exit trap won't fire and the container may be left running; remove it with `docker rm -f `. +### Smolmachines backend (experimental, macOS-only) + +A second backend runs the agent in a smolvm micro-VM (libkrun) with the +sidecar bundle still in Docker. Selected via +`CLAUDE_BOTTLE_BACKEND=smolmachines ./cli.py start `. Requires +`smolvm` on PATH (`curl -sSL https://smolmachines.com/install.sh | sh`). + +**Known limitation, v1:** smolvm's TSI uses macOS networking, and +Docker Desktop's container IPs aren't reachable from macOS, so the +smolmachines bottle dials the sidecar bundle through host loopback +port-forwards (`127.0.0.1:`). TSI filters by IP only, so the +allowlist is `127.0.0.1/32` — meaning the agent VM can reach **any +service bound to macOS's loopback**, not just the bundle's published +ports. Practical implication: while a smolmachines bottle is running, +host-local dev services (postgres on 5432, dev servers, etc.) are +reachable from inside the agent even if you intended them to be +host-private. The docker backend keeps the bottle on a `--internal` +docker network and doesn't have this issue. A future revision will +narrow this via a per-bottle loopback alias + host-side proxy (see +PRD 0023's "loopback scoping" section). + ## Manifest Bottles and agents live as Markdown files with YAML frontmatter under diff --git a/claude_bottle/backend/__init__.py b/claude_bottle/backend/__init__.py index 42eb3f7..c542052 100644 --- a/claude_bottle/backend/__init__.py +++ b/claude_bottle/backend/__init__.py @@ -119,12 +119,20 @@ class Bottle(ABC): def exec_claude(self, argv: list[str], *, tty: bool = True) -> int: ... @abstractmethod - def exec(self, script: str) -> ExecResult: - """Run `script` as a POSIX shell script inside the bottle and - return the captured stdout/stderr/returncode. The bottle's - environment (including HTTPS_PROXY pointing at the pipelock - sidecar) is inherited by the child. Non-zero exit does not - raise — callers inspect `returncode` themselves.""" + def exec(self, script: str, *, user: str = "node") -> ExecResult: + """Run `script` as a POSIX shell script inside the bottle as + `user` (default `node`, matching the agent image's USER + directive) and return the captured stdout/stderr/returncode. + The bottle's environment (including HTTPS_PROXY pointing at + the pipelock sidecar) is inherited by the child. Non-zero + exit does not raise — callers inspect `returncode` + themselves. + + Pass `user="root"` for shell-outs that need privileged file + writes / package install — provisioning calls that need root + bypass `Bottle.exec` and use the backend-specific raw + machine-exec helper, but the tests have a legitimate use + case for arbitrary-user runs.""" @abstractmethod def cp_in(self, host_path: str, container_path: str) -> None: ... diff --git a/claude_bottle/backend/docker/bottle.py b/claude_bottle/backend/docker/bottle.py index 4670748..e0d421c 100644 --- a/claude_bottle/backend/docker/bottle.py +++ b/claude_bottle/backend/docker/bottle.py @@ -51,12 +51,15 @@ class DockerBottle(Bottle): self.claude_docker_argv(argv, tty=tty), check=False, ).returncode - def exec(self, script: str) -> ExecResult: + def exec(self, script: str, *, user: str = "node") -> ExecResult: # Pipe via stdin to `sh -s` so the caller never has to worry # about quoting; the script source lands inside the container - # without crossing argv. + # without crossing argv. `-u ` overrides the image's + # default USER — defaults to `node` which is already the + # image's USER, so the explicit flag is a no-op there but + # keeps the cross-backend contract uniform. result = subprocess.run( - ["docker", "exec", "-i", self.name, "sh", "-s"], + ["docker", "exec", "-u", user, "-i", self.name, "sh", "-s"], input=script, capture_output=True, text=True, diff --git a/claude_bottle/backend/docker/util.py b/claude_bottle/backend/docker/util.py index 6599da5..0854f5b 100644 --- a/claude_bottle/backend/docker/util.py +++ b/claude_bottle/backend/docker/util.py @@ -166,18 +166,15 @@ def image_id(ref: str) -> str: return r.stdout.strip() -def tag(src: str, dst: str) -> None: - """`docker tag SRC DST`. Idempotent. Used by smolmachines prepare - to retag the locally-built image into a localhost:/... ref - that the ephemeral registry will accept.""" - subprocess.run(["docker", "tag", src, dst], check=True) - - -def push(ref: str) -> None: - """`docker push REF`. Used by smolmachines prepare to push the - agent image into the ephemeral local registry so smolvm's crane - backend can pull it.""" - subprocess.run(["docker", "push", ref], check=True) +def save(ref: str, output: str) -> None: + """`docker save REF -o OUTPUT`. Writes a tarball of the image + layers + manifest to the host path. Used by smolmachines + prepare to hand the agent image to a containerized crane that + pushes it to the ephemeral registry — bypassing the docker + daemon's `docker push` (which on Docker Desktop can't reach a + host-loopback registry and refuses plain-HTTP pushes to + non-loopback hosts).""" + subprocess.run(["docker", "save", ref, "-o", output], check=True) def _silent_run(cmd: Iterable[str]) -> int: diff --git a/claude_bottle/backend/smolmachines/bottle.py b/claude_bottle/backend/smolmachines/bottle.py index efa0aa1..89c836f 100644 --- a/claude_bottle/backend/smolmachines/bottle.py +++ b/claude_bottle/backend/smolmachines/bottle.py @@ -4,63 +4,130 @@ Routes `exec_claude` / `exec` / `cp_in` through `smolvm machine exec` / `smolvm machine cp`. The handle is yielded by `launch` and torn down via the surrounding ExitStack on context exit; `close` is a no-op idempotent alias so the BottleBackend ABC's -context-manager contract is satisfied.""" +context-manager contract is satisfied. + +User context: `smolvm machine exec` runs commands as root in the +VM, but the agent image's USER is `node` and claude-code refuses +to run as root with `--dangerously-skip-permissions`. Both +`exec_claude` and `exec` switch to the requested user (default +`node`) via `runuser -u --` and set `HOME` / `USER` +through `smolvm -e` — avoiding `runuser -l`'s login-shell wiring +(PAM session setup, /etc/profile sourcing) which can hang on a +minimal Debian VM with no PAM session config.""" from __future__ import annotations import subprocess -import sys +from typing import Mapping from .. import Bottle, ExecResult from . import smolvm as _smolvm +# Per-user env the agent image's USER (node) expects. claude +# reads ~/.claude.json + writes session state under ~/.claude/; +# bare `runuser -u` inherits root's HOME=/root, which claude +# can't write to. Set HOME / USER explicitly through smolvm -e +# so the child process sees them. +_HOME_FOR = { + "node": "/home/node", + "root": "/root", +} + + +def _env_flags_for(user: str) -> list[str]: + home = _HOME_FOR.get(user, f"/home/{user}") + return ["-e", f"HOME={home}", "-e", f"USER={user}"] + + +def _guest_env_flags(env: Mapping[str, str]) -> list[str]: + """Render `{K: V}` into a flat `-e K=V` argv slice for + `smolvm machine exec`. `smolvm machine create -e` set env + on PID 1 but it doesn't propagate to fresh exec process + trees, so we have to re-pass them every call.""" + out: list[str] = [] + for k, v in env.items(): + out += ["-e", f"{k}={v}"] + return out + + class SmolmachinesBottle(Bottle): """Handle returned by `SmolmachinesBottleBackend.launch`. The underlying VM lifecycle (create / start / stop / delete) lives on the launch ExitStack — this class only routes runtime operations to the right `smolvm machine ...` subcommand.""" - def __init__(self, machine_name: str, *, prompt_path: str | None = None) -> None: + def __init__( + self, + machine_name: str, + *, + prompt_path: str | None = None, + guest_env: Mapping[str, str] | None = None, + ) -> None: self.name = machine_name # In-VM path to the agent's prompt file. None when the # agent declared no prompt (file still exists; we just # don't pass --append-system-prompt-file). self._prompt_path = prompt_path + # Env vars the agent process needs (HTTPS_PROXY, + # CLAUDE_CODE_OAUTH_TOKEN, manifest-declared bottle env, …). + # Forwarded on every `smolvm machine exec` via `-e K=V` + # because exec doesn't inherit from machine_create's env. + self._guest_env = dict(guest_env or {}) def exec_claude(self, argv: list[str], *, tty: bool = True) -> int: - """Run `claude` interactively inside the VM. Inherits the - operator's terminal (stdin / stdout / stderr) so the - session feels native. Blocks until claude exits; returns - the in-VM exit code. + """Run `claude` interactively inside the VM as the `node` + user. Inherits the operator's terminal (stdin / stdout / + stderr) so the session feels native. Blocks until claude + exits; returns the in-VM exit code. We bypass the captured-output `machine_exec` helper here because that one wraps stdout/stderr in pipes — fine for scripted exec, wrong for an interactive shell. Drop down - to `subprocess.run` with the TTY inherited.""" + to `subprocess.run` with the TTY inherited. + + UID switches via `runuser -u node --` (not `-l`) so we + avoid login-shell wiring. HOME / USER come from `smolvm + -e` instead, which sets them on the process env.""" flags = ["smolvm", "machine", "exec", "--name", self.name] if tty: flags += ["-i", "-t"] + flags += _env_flags_for("node") + flags += _guest_env_flags(self._guest_env) claude_argv = ["claude"] if self._prompt_path: claude_argv += ["--append-system-prompt-file", self._prompt_path] - flags += ["--", *claude_argv, *argv] + claude_argv += argv + flags += ["--", "runuser", "-u", "node", "--", *claude_argv] result = subprocess.run(flags, check=False) return result.returncode - def exec(self, script: str) -> ExecResult: - """Run a POSIX shell script and capture the result. The - script runs under `/bin/sh -c`, matching what the docker - backend's `exec` does — callers can write shell-y test - helpers without worrying about argv splitting.""" - r = _smolvm.machine_exec( - self.name, - ["/bin/sh", "-c", script], + def exec(self, script: str, *, user: str = "node") -> ExecResult: + """Run a POSIX shell script as `user` (default `node`) and + capture the result. Matches the docker backend's `exec`, + which defaults to the image's USER (also node) — so test + helpers / provision shell-outs run with the same identity + on both backends. Pass `user="root"` for tests that need + root. + + `runuser -u -- /bin/sh -c