diff --git a/claude_bottle/backend/docker/backend.py b/claude_bottle/backend/docker/backend.py index 3f97db3..affd39f 100644 --- a/claude_bottle/backend/docker/backend.py +++ b/claude_bottle/backend/docker/backend.py @@ -32,9 +32,12 @@ from .bottle_cleanup_plan import DockerBottleCleanupPlan from .bottle_plan import DockerBottlePlan from .pipelock import ( DockerPipelockProxy, - pipelock_proxy_host_port, pipelock_proxy_url, ) +from .provision import git as _git +from .provision import prompt as _prompt +from .provision import skills as _skills +from .provision import ssh as _ssh # Where the repo root lives, for `docker build` context. Computed once. @@ -282,35 +285,8 @@ class DockerBottleBackend(BottleBackend): info(f"name conflict; retrying as {container}") def provision_prompt(self, plan: BottlePlan, target: str) -> str | None: - """Copy the prompt file into the container, fix ownership/mode. - Returns the in-container path if the agent has a non-empty - prompt (drives --append-system-prompt-file), else None. The - file is copied either way so the path always exists.""" assert isinstance(plan, DockerBottlePlan) - container = target - container_home = os.environ.get("CLAUDE_BOTTLE_CONTAINER_HOME", "/home/node") - in_container_prompt_path = f"{container_home}/.claude-bottle-prompt.txt" - - subprocess.run( - ["docker", "cp", str(plan.prompt_file), f"{container}:{in_container_prompt_path}"], - stdout=subprocess.DEVNULL, - check=True, - ) - # `docker cp` preserves host UID; re-own/mode as root so node - # can read its own mode-600 prompt regardless of host UID. - subprocess.run( - ["docker", "exec", "-u", "0", container, "chown", "node:node", in_container_prompt_path], - stdout=subprocess.DEVNULL, - check=True, - ) - subprocess.run( - ["docker", "exec", "-u", "0", container, "chmod", "600", in_container_prompt_path], - stdout=subprocess.DEVNULL, - check=True, - ) - - agent = plan.spec.manifest.agents[plan.spec.agent_name] - return in_container_prompt_path if agent.prompt else None + return _prompt.provision_prompt(plan, target) def validate_skills(self, skills: list[str]) -> None: """Fail loudly if any named skill is missing from the host's @@ -326,50 +302,8 @@ class DockerBottleBackend(BottleBackend): ) def provision_skills(self, plan: BottlePlan, target: str) -> None: - """Copy each of the agent's named skills from the host's - ~/.claude/skills// into the container's equivalent path. - For each skill: ensure parent dir, wipe any prior copy, then - `docker cp /. :/` so the contents are - copied into a freshly-created destination dir. No-op when the - agent has no skills.""" assert isinstance(plan, DockerBottlePlan) - agent = plan.spec.manifest.agents[plan.spec.agent_name] - if not agent.skills: - return - - container = target - container_home = os.environ.get("CLAUDE_BOTTLE_CONTAINER_HOME", "/home/node") - skills_dir = os.environ.get( - "CLAUDE_BOTTLE_CONTAINER_SKILLS_DIR", f"{container_home}/.claude/skills" - ) - - subprocess.run( - ["docker", "exec", container, "mkdir", "-p", skills_dir], - stdout=subprocess.DEVNULL, - check=True, - ) - - for n in agent.skills: - src = host_skill_dir(n) - if not os.path.isdir(src): - die(f"skill '{n}' disappeared from host between validation and copy at {src}.") - dst = f"{skills_dir}/{n}" - info(f"copying skill {n} into {container}:{dst}") - subprocess.run( - ["docker", "exec", container, "rm", "-rf", dst], - stdout=subprocess.DEVNULL, - check=True, - ) - subprocess.run( - ["docker", "exec", container, "mkdir", "-p", dst], - stdout=subprocess.DEVNULL, - check=True, - ) - subprocess.run( - ["docker", "cp", f"{src}/.", f"{container}:{dst}/"], - stdout=subprocess.DEVNULL, - check=True, - ) + _skills.provision_skills(plan, target) def validate_ssh_entries(self, entries: Sequence[SshEntry]) -> None: """Each entry's IdentityFile must exist on the host (after @@ -383,196 +317,12 @@ class DockerBottleBackend(BottleBackend): die(f"ssh key file not found for host '{entry.Host}': {key}") def provision_ssh(self, plan: BottlePlan, target: str) -> None: - """Set up SSH in the container so node can authenticate using - each entry's key without the key file being readable by node. - No-op when the bottle has no SSH entries. - - Isolation strategy: - - Keys live at /root/.claude-bottle-keys/ (mode 700, - root-owned). /root is mode 700 in node:22-slim, so node - (uid 1000) can't even traverse in. - - ssh-agent runs as root, listening on - /run/claude-bottle-agent.sock. Each key is loaded with - ssh-add, then deleted; the bytes now live only in the - agent process's memory. - - ssh-agent's SO_PEERCRED-based UID match rejects every - connection whose peer euid is neither 0 nor the agent's. - To bridge that, a root-owned socat forwarder listens on - /run/claude-bottle-agent-public.sock (mode 666) and - proxies bytes to the real agent socket. - - node can't ptrace root-owned agent or socat, so - /proc//mem is off-limits and key bytes never leave - root-owned memory. - - ~/.ssh/config in node's home points each Host at the - public socket via IdentityAgent. - - Why an in-container agent (not bind-mounted from host): - Docker Desktop on macOS does not forward Unix-domain socket - connect() across the VM boundary — connect() returns - ENOTSUP. Running ssh-agent inside the container sidesteps - that entirely. - - Limitation: keys must be passphrase-less. ssh-add prompts on - /dev/tty for passphrases, but our docker exec has no TTY.""" assert isinstance(plan, DockerBottlePlan) - bottle = plan.spec.manifest.bottle_for(plan.spec.agent_name) - if not bottle.ssh: - return - - container = target - proxy_host_port = pipelock_proxy_host_port(plan.slug) - container_home = os.environ.get("CLAUDE_BOTTLE_CONTAINER_HOME", "/home/node") - container_ssh = f"{container_home}/.ssh" - agent_socket = "/run/claude-bottle-agent.sock" - public_socket = "/run/claude-bottle-agent-public.sock" - keys_dir = "/root/.claude-bottle-keys" - - # ~/.ssh for node (700, owned by node). - docker_mod.docker_exec_root(container, ["mkdir", "-p", container_ssh]) - docker_mod.docker_exec_root(container, ["chown", "node:node", container_ssh]) - docker_mod.docker_exec_root(container, ["chmod", "700", container_ssh]) - - # /root/.claude-bottle-keys for root (700, root-owned). - docker_mod.docker_exec_root(container, ["mkdir", "-p", keys_dir]) - docker_mod.docker_exec_root(container, ["chown", "root:root", keys_dir]) - docker_mod.docker_exec_root(container, ["chmod", "700", keys_dir]) - - config_file = plan.stage_dir / "ssh_config" - known_hosts_file = plan.stage_dir / "ssh_known_hosts" - config_file.write_text("") - config_file.chmod(0o600) - known_hosts_file.write_text("") - known_hosts_file.chmod(0o600) - - proxy_host, _, proxy_port = proxy_host_port.partition(":") - - container_key_paths: list[str] = [] - for entry in bottle.ssh: - name = entry.Host - key = expand_tilde(entry.IdentityFile) - hostname = entry.Hostname - user = entry.User - port = entry.Port - known_host_key = entry.KnownHostKey - - key_basename = os.path.basename(key) - container_key_path = f"{keys_dir}/{key_basename}" - - info(f"copying ssh key for '{name}' -> {container} (root-only staging)") - subprocess.run( - ["docker", "cp", key, f"{container}:{container_key_path}"], - stdout=subprocess.DEVNULL, - check=True, - ) - docker_mod.docker_exec_root(container, ["chown", "root:root", container_key_path]) - docker_mod.docker_exec_root(container, ["chmod", "600", container_key_path]) - - container_key_paths.append(container_key_path) - - # ProxyCommand tunnels SSH through pipelock via HTTP - # CONNECT. %h / %p expand to this block's HostName / - # Port. socat's PROXY: mode does CONNECT host:port to - # the proxy. - block = ( - f"Host {name}\n" - f" HostName {hostname}\n" - f" User {user}\n" - f" Port {port}\n" - f" IdentityAgent {public_socket}\n" - f" ProxyCommand socat - PROXY:{proxy_host}:%h:%p,proxyport={proxy_port}\n" - f"\n" - ) - with config_file.open("a") as f: - f.write(block) - - if known_host_key: - entries_to_write: list[str] = [] - if port == "22": - entries_to_write.append(f"{name} {known_host_key}\n") - if hostname != name: - entries_to_write.append(f"{hostname} {known_host_key}\n") - else: - entries_to_write.append(f"[{name}]:{port} {known_host_key}\n") - if hostname != name: - entries_to_write.append(f"[{hostname}]:{port} {known_host_key}\n") - with known_hosts_file.open("a") as f: - for e in entries_to_write: - f.write(e) - - # Boot the agent, load each key, delete the key files, then - # start the root-owned socat forwarder. One docker exec so the - # whole sequence is atomic. - info(f"starting in-container ssh-agent at {agent_socket} (forwarded via {public_socket})") - setup_lines = [ - "set -eu", - f"ssh-agent -a {agent_socket} >/dev/null", - ] - for kp in container_key_paths: - setup_lines.append(f"SSH_AUTH_SOCK={agent_socket} ssh-add {kp}") - setup_lines.append(f"rm -f {kp}") - setup_lines.append(f"rmdir {keys_dir} 2>/dev/null || true") - # Forwarder: socat (uid 0) connects to the agent on node's behalf. - setup_lines.append( - f"nohup socat UNIX-LISTEN:{public_socket},fork,reuseaddr,mode=666 " - f"UNIX-CONNECT:{agent_socket} /dev/null 2>&1 &" - ) - # Wait briefly for the forwarder to bind. - setup_lines.extend([ - "i=0", - "while [ $i -lt 20 ]; do", - f" [ -S {public_socket} ] && break", - " i=$((i + 1))", - " sleep 0.1", - "done", - f"[ -S {public_socket} ] || {{ echo 'claude-bottle: socat forwarder failed to bind {public_socket}' >&2; exit 1; }}", - ]) - setup_script = "\n".join(setup_lines) + "\n" - subprocess.run( - ["docker", "exec", "-u", "0", container, "sh", "-c", setup_script], - check=True, - ) - - info(f"writing {container_ssh}/config") - subprocess.run( - ["docker", "cp", str(config_file), f"{container}:{container_ssh}/config"], - stdout=subprocess.DEVNULL, - check=True, - ) - docker_mod.docker_exec_root(container, ["chown", "node:node", f"{container_ssh}/config"]) - docker_mod.docker_exec_root(container, ["chmod", "600", f"{container_ssh}/config"]) - - if known_hosts_file.stat().st_size > 0: - info(f"writing {container_ssh}/known_hosts") - subprocess.run( - ["docker", "cp", str(known_hosts_file), f"{container}:{container_ssh}/known_hosts"], - stdout=subprocess.DEVNULL, - check=True, - ) - docker_mod.docker_exec_root(container, ["chown", "node:node", f"{container_ssh}/known_hosts"]) - docker_mod.docker_exec_root(container, ["chmod", "600", f"{container_ssh}/known_hosts"]) + _ssh.provision_ssh(plan, target) def provision_git(self, plan: BottlePlan, target: str) -> None: - """If --cwd was set and the host cwd has a .git directory, copy - it into /home/node/workspace/.git and fix ownership. No-op - otherwise.""" assert isinstance(plan, DockerBottlePlan) - if not (plan.spec.copy_cwd and Path(plan.spec.user_cwd, ".git").is_dir()): - return - container = target - info(f"copying {plan.spec.user_cwd}/.git -> {container}:/home/node/workspace/.git") - subprocess.run( - ["docker", "cp", f"{plan.spec.user_cwd}/.git", f"{container}:/home/node/workspace/.git"], - stdout=subprocess.DEVNULL, - check=True, - ) - subprocess.run( - [ - "docker", "exec", "-u", "0", container, - "chown", "-R", "node:node", "/home/node/workspace/.git", - ], - stdout=subprocess.DEVNULL, - check=True, - ) + _git.provision_git(plan, target) # --- Cleanup --- diff --git a/claude_bottle/backend/docker/provision/__init__.py b/claude_bottle/backend/docker/provision/__init__.py new file mode 100644 index 0000000..2f66425 --- /dev/null +++ b/claude_bottle/backend/docker/provision/__init__.py @@ -0,0 +1,8 @@ +"""Per-provisioner modules for the Docker backend. + +Each module exports one top-level function: + provision_(plan: DockerBottlePlan, target: str) -> ... + +`DockerBottleBackend.provision_*` methods delegate to these. The +abstract `BottleBackend.provision_*` surface is unchanged; this +subpackage exists only to keep `backend.py` from being a god-file.""" diff --git a/claude_bottle/backend/docker/provision/git.py b/claude_bottle/backend/docker/provision/git.py new file mode 100644 index 0000000..3007fac --- /dev/null +++ b/claude_bottle/backend/docker/provision/git.py @@ -0,0 +1,36 @@ +"""Copy the host cwd's .git directory into a running Docker bottle. + +Only fires when `--cwd` was passed AND the host cwd actually has a +.git. The container-side path is fixed at /home/node/workspace/.git; +ownership is reset to node so the agent can run git commands.""" + +from __future__ import annotations + +import subprocess +from pathlib import Path + +from ....log import info +from ..bottle_plan import DockerBottlePlan + + +def provision_git(plan: DockerBottlePlan, target: str) -> None: + """If --cwd was set and the host cwd has a .git directory, copy + it into /home/node/workspace/.git and fix ownership. No-op + otherwise.""" + if not (plan.spec.copy_cwd and Path(plan.spec.user_cwd, ".git").is_dir()): + return + container = target + info(f"copying {plan.spec.user_cwd}/.git -> {container}:/home/node/workspace/.git") + subprocess.run( + ["docker", "cp", f"{plan.spec.user_cwd}/.git", f"{container}:/home/node/workspace/.git"], + stdout=subprocess.DEVNULL, + check=True, + ) + subprocess.run( + [ + "docker", "exec", "-u", "0", container, + "chown", "-R", "node:node", "/home/node/workspace/.git", + ], + stdout=subprocess.DEVNULL, + check=True, + ) diff --git a/claude_bottle/backend/docker/provision/prompt.py b/claude_bottle/backend/docker/provision/prompt.py new file mode 100644 index 0000000..29df62c --- /dev/null +++ b/claude_bottle/backend/docker/provision/prompt.py @@ -0,0 +1,43 @@ +"""Copy the agent prompt into a running Docker bottle. + +The prompt file is always copied (so the in-container path always +exists) but `--append-system-prompt-file` only fires when the agent +actually has a prompt — the return value signals which case.""" + +from __future__ import annotations + +import os +import subprocess + +from ..bottle_plan import DockerBottlePlan + + +def provision_prompt(plan: DockerBottlePlan, target: str) -> str | None: + """Copy the prompt file into the container, fix ownership/mode. + Returns the in-container path if the agent has a non-empty + prompt (drives --append-system-prompt-file), else None. The + file is copied either way so the path always exists.""" + container = target + container_home = os.environ.get("CLAUDE_BOTTLE_CONTAINER_HOME", "/home/node") + in_container_prompt_path = f"{container_home}/.claude-bottle-prompt.txt" + + subprocess.run( + ["docker", "cp", str(plan.prompt_file), f"{container}:{in_container_prompt_path}"], + stdout=subprocess.DEVNULL, + check=True, + ) + # `docker cp` preserves host UID; re-own/mode as root so node + # can read its own mode-600 prompt regardless of host UID. + subprocess.run( + ["docker", "exec", "-u", "0", container, "chown", "node:node", in_container_prompt_path], + stdout=subprocess.DEVNULL, + check=True, + ) + subprocess.run( + ["docker", "exec", "-u", "0", container, "chmod", "600", in_container_prompt_path], + stdout=subprocess.DEVNULL, + check=True, + ) + + agent = plan.spec.manifest.agents[plan.spec.agent_name] + return in_container_prompt_path if agent.prompt else None diff --git a/claude_bottle/backend/docker/provision/skills.py b/claude_bottle/backend/docker/provision/skills.py new file mode 100644 index 0000000..410b76e --- /dev/null +++ b/claude_bottle/backend/docker/provision/skills.py @@ -0,0 +1,62 @@ +"""Copy host-side skill directories into a running Docker bottle. + +Skills are validated on the host before launch by +`DockerBottleBackend.validate_skills`; this module assumes that +validation has already run. A skill disappearing between validation +and copy still dies loudly rather than silently producing a partial +container.""" + +from __future__ import annotations + +import os +import subprocess + +from ....log import die, info +from ...util import host_skill_dir +from ..bottle_plan import DockerBottlePlan + + +def provision_skills(plan: DockerBottlePlan, target: str) -> None: + """Copy each of the agent's named skills from the host's + ~/.claude/skills// into the container's equivalent path. + For each skill: ensure parent dir, wipe any prior copy, then + `docker cp /. :/` so the contents are + copied into a freshly-created destination dir. No-op when the + agent has no skills.""" + agent = plan.spec.manifest.agents[plan.spec.agent_name] + if not agent.skills: + return + + container = target + container_home = os.environ.get("CLAUDE_BOTTLE_CONTAINER_HOME", "/home/node") + skills_dir = os.environ.get( + "CLAUDE_BOTTLE_CONTAINER_SKILLS_DIR", f"{container_home}/.claude/skills" + ) + + subprocess.run( + ["docker", "exec", container, "mkdir", "-p", skills_dir], + stdout=subprocess.DEVNULL, + check=True, + ) + + for n in agent.skills: + src = host_skill_dir(n) + if not os.path.isdir(src): + die(f"skill '{n}' disappeared from host between validation and copy at {src}.") + dst = f"{skills_dir}/{n}" + info(f"copying skill {n} into {container}:{dst}") + subprocess.run( + ["docker", "exec", container, "rm", "-rf", dst], + stdout=subprocess.DEVNULL, + check=True, + ) + subprocess.run( + ["docker", "exec", container, "mkdir", "-p", dst], + stdout=subprocess.DEVNULL, + check=True, + ) + subprocess.run( + ["docker", "cp", f"{src}/.", f"{container}:{dst}/"], + stdout=subprocess.DEVNULL, + check=True, + ) diff --git a/claude_bottle/backend/docker/provision/ssh.py b/claude_bottle/backend/docker/provision/ssh.py new file mode 100644 index 0000000..6db6717 --- /dev/null +++ b/claude_bottle/backend/docker/provision/ssh.py @@ -0,0 +1,193 @@ +"""Set up SSH inside a running Docker bottle. + +This is the most involved provisioner. The end state in the container: + - ~/.ssh/config + ~/.ssh/known_hosts owned by node, mode 600 + - ssh-agent running as root with each key loaded; agent socket at + /run/claude-bottle-agent.sock + - socat forwarder (also root) bridging the agent socket to + /run/claude-bottle-agent-public.sock (mode 666) so node can talk + to the agent despite ssh-agent's SO_PEERCRED UID match + - on-disk key files deleted after `ssh-add`; the bytes only live in + the agent process's memory thereafter + +See the `provision_ssh` docstring for the full isolation rationale.""" + +from __future__ import annotations + +import os +import subprocess + +from ....log import info +from ....util import expand_tilde +from .. import util as docker_mod +from ..bottle_plan import DockerBottlePlan +from ..pipelock import pipelock_proxy_host_port + + +def provision_ssh(plan: DockerBottlePlan, target: str) -> None: + """Set up SSH in the container so node can authenticate using + each entry's key without the key file being readable by node. + No-op when the bottle has no SSH entries. + + Isolation strategy: + - Keys live at /root/.claude-bottle-keys/ (mode 700, + root-owned). /root is mode 700 in node:22-slim, so node + (uid 1000) can't even traverse in. + - ssh-agent runs as root, listening on + /run/claude-bottle-agent.sock. Each key is loaded with + ssh-add, then deleted; the bytes now live only in the + agent process's memory. + - ssh-agent's SO_PEERCRED-based UID match rejects every + connection whose peer euid is neither 0 nor the agent's. + To bridge that, a root-owned socat forwarder listens on + /run/claude-bottle-agent-public.sock (mode 666) and + proxies bytes to the real agent socket. + - node can't ptrace root-owned agent or socat, so + /proc//mem is off-limits and key bytes never leave + root-owned memory. + - ~/.ssh/config in node's home points each Host at the + public socket via IdentityAgent. + + Why an in-container agent (not bind-mounted from host): + Docker Desktop on macOS does not forward Unix-domain socket + connect() across the VM boundary — connect() returns + ENOTSUP. Running ssh-agent inside the container sidesteps + that entirely. + + Limitation: keys must be passphrase-less. ssh-add prompts on + /dev/tty for passphrases, but our docker exec has no TTY.""" + bottle = plan.spec.manifest.bottle_for(plan.spec.agent_name) + if not bottle.ssh: + return + + container = target + proxy_host_port = pipelock_proxy_host_port(plan.slug) + container_home = os.environ.get("CLAUDE_BOTTLE_CONTAINER_HOME", "/home/node") + container_ssh = f"{container_home}/.ssh" + agent_socket = "/run/claude-bottle-agent.sock" + public_socket = "/run/claude-bottle-agent-public.sock" + keys_dir = "/root/.claude-bottle-keys" + + # ~/.ssh for node (700, owned by node). + docker_mod.docker_exec_root(container, ["mkdir", "-p", container_ssh]) + docker_mod.docker_exec_root(container, ["chown", "node:node", container_ssh]) + docker_mod.docker_exec_root(container, ["chmod", "700", container_ssh]) + + # /root/.claude-bottle-keys for root (700, root-owned). + docker_mod.docker_exec_root(container, ["mkdir", "-p", keys_dir]) + docker_mod.docker_exec_root(container, ["chown", "root:root", keys_dir]) + docker_mod.docker_exec_root(container, ["chmod", "700", keys_dir]) + + config_file = plan.stage_dir / "ssh_config" + known_hosts_file = plan.stage_dir / "ssh_known_hosts" + config_file.write_text("") + config_file.chmod(0o600) + known_hosts_file.write_text("") + known_hosts_file.chmod(0o600) + + proxy_host, _, proxy_port = proxy_host_port.partition(":") + + container_key_paths: list[str] = [] + for entry in bottle.ssh: + name = entry.Host + key = expand_tilde(entry.IdentityFile) + hostname = entry.Hostname + user = entry.User + port = entry.Port + known_host_key = entry.KnownHostKey + + key_basename = os.path.basename(key) + container_key_path = f"{keys_dir}/{key_basename}" + + info(f"copying ssh key for '{name}' -> {container} (root-only staging)") + subprocess.run( + ["docker", "cp", key, f"{container}:{container_key_path}"], + stdout=subprocess.DEVNULL, + check=True, + ) + docker_mod.docker_exec_root(container, ["chown", "root:root", container_key_path]) + docker_mod.docker_exec_root(container, ["chmod", "600", container_key_path]) + + container_key_paths.append(container_key_path) + + # ProxyCommand tunnels SSH through pipelock via HTTP + # CONNECT. %h / %p expand to this block's HostName / + # Port. socat's PROXY: mode does CONNECT host:port to + # the proxy. + block = ( + f"Host {name}\n" + f" HostName {hostname}\n" + f" User {user}\n" + f" Port {port}\n" + f" IdentityAgent {public_socket}\n" + f" ProxyCommand socat - PROXY:{proxy_host}:%h:%p,proxyport={proxy_port}\n" + f"\n" + ) + with config_file.open("a") as f: + f.write(block) + + if known_host_key: + entries_to_write: list[str] = [] + if port == "22": + entries_to_write.append(f"{name} {known_host_key}\n") + if hostname != name: + entries_to_write.append(f"{hostname} {known_host_key}\n") + else: + entries_to_write.append(f"[{name}]:{port} {known_host_key}\n") + if hostname != name: + entries_to_write.append(f"[{hostname}]:{port} {known_host_key}\n") + with known_hosts_file.open("a") as f: + for e in entries_to_write: + f.write(e) + + # Boot the agent, load each key, delete the key files, then + # start the root-owned socat forwarder. One docker exec so the + # whole sequence is atomic. + info(f"starting in-container ssh-agent at {agent_socket} (forwarded via {public_socket})") + setup_lines = [ + "set -eu", + f"ssh-agent -a {agent_socket} >/dev/null", + ] + for kp in container_key_paths: + setup_lines.append(f"SSH_AUTH_SOCK={agent_socket} ssh-add {kp}") + setup_lines.append(f"rm -f {kp}") + setup_lines.append(f"rmdir {keys_dir} 2>/dev/null || true") + # Forwarder: socat (uid 0) connects to the agent on node's behalf. + setup_lines.append( + f"nohup socat UNIX-LISTEN:{public_socket},fork,reuseaddr,mode=666 " + f"UNIX-CONNECT:{agent_socket} /dev/null 2>&1 &" + ) + # Wait briefly for the forwarder to bind. + setup_lines.extend([ + "i=0", + "while [ $i -lt 20 ]; do", + f" [ -S {public_socket} ] && break", + " i=$((i + 1))", + " sleep 0.1", + "done", + f"[ -S {public_socket} ] || {{ echo 'claude-bottle: socat forwarder failed to bind {public_socket}' >&2; exit 1; }}", + ]) + setup_script = "\n".join(setup_lines) + "\n" + subprocess.run( + ["docker", "exec", "-u", "0", container, "sh", "-c", setup_script], + check=True, + ) + + info(f"writing {container_ssh}/config") + subprocess.run( + ["docker", "cp", str(config_file), f"{container}:{container_ssh}/config"], + stdout=subprocess.DEVNULL, + check=True, + ) + docker_mod.docker_exec_root(container, ["chown", "node:node", f"{container_ssh}/config"]) + docker_mod.docker_exec_root(container, ["chmod", "600", f"{container_ssh}/config"]) + + if known_hosts_file.stat().st_size > 0: + info(f"writing {container_ssh}/known_hosts") + subprocess.run( + ["docker", "cp", str(known_hosts_file), f"{container}:{container_ssh}/known_hosts"], + stdout=subprocess.DEVNULL, + check=True, + ) + docker_mod.docker_exec_root(container, ["chown", "node:node", f"{container_ssh}/known_hosts"]) + docker_mod.docker_exec_root(container, ["chmod", "600", f"{container_ssh}/known_hosts"]) diff --git a/docs/prds/0004-split-out-provisioners.md b/docs/prds/0004-split-out-provisioners.md new file mode 100644 index 0000000..38bf4f5 --- /dev/null +++ b/docs/prds/0004-split-out-provisioners.md @@ -0,0 +1,226 @@ +# PRD 0004: Split out provisioners + +- **Status:** Draft +- **Author:** didericis +- **Created:** 2026-05-11 + +## Summary + +Break `claude_bottle/backend/docker/backend.py` (664 lines) apart by +moving the four provisioner methods — `provision_prompt`, +`provision_skills`, `provision_ssh`, `provision_git` — out of +`DockerBottleBackend` into their own modules under +`claude_bottle/backend/docker/provision/`. The abstract base in +`claude_bottle/backend/__init__.py` keeps the same four-method +contract; only the Docker implementation changes shape. + +## Problem + +`DockerBottleBackend` is doing too much in one file. After PRD 0003 +landed, the class owns: + +- `prepare` — name resolution, validation, scratch file writes +- `launch` — image build, network creation, sidecar lifecycle, + `docker run`, teardown +- `_run_agent_container` — argv assembly + name-conflict retry +- `provision_prompt` / `provision_skills` / `provision_ssh` / + `provision_git` — four host→container copy paths +- `prepare_cleanup` / `cleanup` / `list_active` — orphan handling + +The provisioners are the largest single chunk. `provision_ssh` alone +is ~150 lines because it sets up a root-staged keyring, an in-container +`ssh-agent`, and a `socat` forwarder so node (uid 1000) can talk to a +root-owned agent socket without ptrace access. That logic is +self-contained — it touches the container via `docker exec` and +`docker cp` and reads from `BottlePlan` — but it sits in the same file +as image build and cleanup, which makes the file hard to scan and +invites unrelated changes to land in the same diff. + +The provisioners are also the most likely place for new backends to +diverge. A future fly.io backend would not run `ssh-agent` in a +sidecar this way; an Apple `container` backend might. Pulling each +provisioner into its own module makes the per-backend variation a +file boundary, not a method boundary inside a god-class. + +## Goals / Success Criteria + +The feature works when all of the following are observable: + +- `cli.py start` produces a byte-identical container topology, env, + skills layout, SSH config, and `.git` copy as before the split. No + user-visible behavior change. +- `DockerBottleBackend` in `backend.py` is under ~350 lines, with the + four provisioner methods reduced to thin dispatchers that delegate + to the per-provisioner modules. +- The full test suite passes unchanged (unit + integration + canary). + +The feature is **done** when all of the following ship: + +- A new `claude_bottle/backend/docker/provision/` subpackage exists + with one module per provisioner: `prompt.py`, `skills.py`, `ssh.py`, + `git.py`. Each exports a single top-level function taking + `(plan: DockerBottlePlan, target: str)` and returning the same type + the current method returns (`str | None` for prompt, `None` for the + others). +- `DockerBottleBackend.provision_prompt` / `provision_skills` / + `provision_ssh` / `provision_git` each become one-line delegations + to the new module functions. +- The abstract `BottleBackend.provision_*` signatures in + `claude_bottle/backend/__init__.py` are unchanged. The + `BottleBackend.provision` orchestration in the base class is + unchanged. +- No top-level CLI code or other backend gains a direct import of the + provisioner modules — the only call site is + `DockerBottleBackend.provision_*`. + +## Non-goals + +- No change to *what* the provisioners do. The SSH provisioning's + root-keyring + ssh-agent + socat-bridge design stays exactly as it + is. The skills `docker cp /. /` pattern stays. The + `.git` copy stays gated on `spec.copy_cwd` + cwd having a `.git`. +- No replacement of `launch`'s ad-hoc `state: dict[str, str]` + teardown with `contextlib.ExitStack`. That cleanup is worthwhile + but is a separate change. +- No deduplication of the two name-conflict retry loops (one in + `prepare`, one in `_run_agent_container`). +- No removal of the `os.environ["CLAUDE_CODE_OAUTH_TOKEN"]` mutation + in `_run_agent_container`. That's a parent-process side effect + worth fixing, but it's outside the provisioner split. +- No new abstract base class for provisioners (no `Provisioner` ABC). + The four functions stay module-level; the abstract surface is the + four methods on `BottleBackend`. Introducing a `Provisioner` type + would be premature with one backend. +- No change to the `BottleBackend.provision_*` method names or + signatures. Callers continue to invoke them on the backend + instance. + +## Scope + +### In scope + +- New `claude_bottle/backend/docker/provision/` subpackage with + `__init__.py`, `prompt.py`, `skills.py`, `ssh.py`, `git.py`. +- Moving the four method bodies out of + `DockerBottleBackend` into the new modules verbatim, adjusting only + what's needed to make them free functions: `self` becomes implicit + via the `plan` argument; private helpers move with their primary + caller; imports of `docker_mod`, `network_mod`, `pipelock`, + `expand_tilde`, etc. follow them. +- Reducing the `provision_*` methods on `DockerBottleBackend` to + one-line delegations. +- Updating any tests that monkeypatch + `DockerBottleBackend.provision_*` to monkeypatch the new module + functions instead (if any do — most existing tests don't reach + into provisioning). + +### Out of scope + +- `prepare`, `launch`, `_run_agent_container`, `prepare_cleanup`, + `cleanup`, `list_active`. These stay in `backend.py`. +- The `validate_skills` and `validate_ssh_entries` host-side + validation methods. They run from `prepare` (before the y/N), not + from `provision`, so they belong with `prepare` and stay on the + class. +- Any change to the abstract `BottleBackend` base in + `backend/__init__.py`. +- Cross-backend reuse of provisioner code. There's no second backend + yet; designing for one before it exists would be premature. + +## Proposed Design + +### New layout + +``` +claude_bottle/backend/docker/ + backend.py # DockerBottleBackend (slimmer) + bottle.py + bottle_plan.py + bottle_cleanup_plan.py + network.py + pipelock.py + util.py + provision/ + __init__.py # empty; explicit imports per module + prompt.py # provision_prompt(plan, target) -> str | None + skills.py # provision_skills(plan, target) -> None + ssh.py # provision_ssh(plan, target) -> None + git.py # provision_git(plan, target) -> None +``` + +### Function signatures + +Each module exports one top-level function with the same shape: + +```python +# prompt.py +def provision_prompt(plan: DockerBottlePlan, target: str) -> str | None: ... + +# skills.py +def provision_skills(plan: DockerBottlePlan, target: str) -> None: ... + +# ssh.py +def provision_ssh(plan: DockerBottlePlan, target: str) -> None: ... + +# git.py +def provision_git(plan: DockerBottlePlan, target: str) -> None: ... +``` + +`target` is the resolved container name (same value the current +methods receive). The functions are free functions, not methods, so +they don't accept `self`. + +### Delegation on the backend + +`DockerBottleBackend.provision_*` shrinks to: + +```python +from .provision import prompt as _prompt +from .provision import skills as _skills +from .provision import ssh as _ssh +from .provision import git as _git + +class DockerBottleBackend(BottleBackend): + ... + def provision_prompt(self, plan: BottlePlan, target: str) -> str | None: + assert isinstance(plan, DockerBottlePlan) + return _prompt.provision_prompt(plan, target) + + def provision_skills(self, plan: BottlePlan, target: str) -> None: + assert isinstance(plan, DockerBottlePlan) + _skills.provision_skills(plan, target) + + # ...same for ssh, git +``` + +The `isinstance` assert stays on the method (the abstract base passes +`BottlePlan`, not `DockerBottlePlan`) so the module functions can +take the concrete type and skip re-checking. + +### Existing code touched + +- **`claude_bottle/backend/docker/backend.py`** — four method + bodies move out; method definitions stay as one-line delegations. + Imports for `pipelock_proxy_host_port`, `expand_tilde`, etc., that + are only used by the moved bodies migrate with them. +- **`claude_bottle/backend/docker/__init__.py`** — no change. The + public surface (`DockerBottleBackend`) is unchanged. +- **`claude_bottle/backend/__init__.py`** — no change. +- **`tests/`** — no expected change. Existing tests exercise the + backend via `DockerBottleBackend` or the CLI surface; they don't + reach into provisioners directly. Verify after the move and only + update if a test breaks. + +### Data model changes + +None. + +### External dependencies + +None new. + +## References + +- PRD 0003 (`docs/prds/0003-bottle-backend-abstraction.md`) — + establishes the four-method provisioner contract being preserved + here.