PRD 0004: Split out provisioners #7

Merged
didericis merged 5 commits from split-out-provisioners into main 2026-05-11 19:47:27 -04:00
7 changed files with 576 additions and 258 deletions
+8 -258
View File
@@ -32,9 +32,12 @@ from .bottle_cleanup_plan import DockerBottleCleanupPlan
from .bottle_plan import DockerBottlePlan
from .pipelock import (
DockerPipelockProxy,
pipelock_proxy_host_port,
pipelock_proxy_url,
)
from .provision import git as _git
from .provision import prompt as _prompt
from .provision import skills as _skills
from .provision import ssh as _ssh
# Where the repo root lives, for `docker build` context. Computed once.
@@ -282,35 +285,8 @@ class DockerBottleBackend(BottleBackend):
info(f"name conflict; retrying as {container}")
def provision_prompt(self, plan: BottlePlan, target: str) -> str | None:
"""Copy the prompt file into the container, fix ownership/mode.
Returns the in-container path if the agent has a non-empty
prompt (drives --append-system-prompt-file), else None. The
file is copied either way so the path always exists."""
assert isinstance(plan, DockerBottlePlan)
container = target
container_home = os.environ.get("CLAUDE_BOTTLE_CONTAINER_HOME", "/home/node")
in_container_prompt_path = f"{container_home}/.claude-bottle-prompt.txt"
subprocess.run(
["docker", "cp", str(plan.prompt_file), f"{container}:{in_container_prompt_path}"],
stdout=subprocess.DEVNULL,
check=True,
)
# `docker cp` preserves host UID; re-own/mode as root so node
# can read its own mode-600 prompt regardless of host UID.
subprocess.run(
["docker", "exec", "-u", "0", container, "chown", "node:node", in_container_prompt_path],
stdout=subprocess.DEVNULL,
check=True,
)
subprocess.run(
["docker", "exec", "-u", "0", container, "chmod", "600", in_container_prompt_path],
stdout=subprocess.DEVNULL,
check=True,
)
agent = plan.spec.manifest.agents[plan.spec.agent_name]
return in_container_prompt_path if agent.prompt else None
return _prompt.provision_prompt(plan, target)
def validate_skills(self, skills: list[str]) -> None:
"""Fail loudly if any named skill is missing from the host's
@@ -326,50 +302,8 @@ class DockerBottleBackend(BottleBackend):
)
def provision_skills(self, plan: BottlePlan, target: str) -> None:
"""Copy each of the agent's named skills from the host's
~/.claude/skills/<name>/ into the container's equivalent path.
For each skill: ensure parent dir, wipe any prior copy, then
`docker cp <host>/. <container>:<dst>/` so the contents are
copied into a freshly-created destination dir. No-op when the
agent has no skills."""
assert isinstance(plan, DockerBottlePlan)
agent = plan.spec.manifest.agents[plan.spec.agent_name]
if not agent.skills:
return
container = target
container_home = os.environ.get("CLAUDE_BOTTLE_CONTAINER_HOME", "/home/node")
skills_dir = os.environ.get(
"CLAUDE_BOTTLE_CONTAINER_SKILLS_DIR", f"{container_home}/.claude/skills"
)
subprocess.run(
["docker", "exec", container, "mkdir", "-p", skills_dir],
stdout=subprocess.DEVNULL,
check=True,
)
for n in agent.skills:
src = host_skill_dir(n)
if not os.path.isdir(src):
die(f"skill '{n}' disappeared from host between validation and copy at {src}.")
dst = f"{skills_dir}/{n}"
info(f"copying skill {n} into {container}:{dst}")
subprocess.run(
["docker", "exec", container, "rm", "-rf", dst],
stdout=subprocess.DEVNULL,
check=True,
)
subprocess.run(
["docker", "exec", container, "mkdir", "-p", dst],
stdout=subprocess.DEVNULL,
check=True,
)
subprocess.run(
["docker", "cp", f"{src}/.", f"{container}:{dst}/"],
stdout=subprocess.DEVNULL,
check=True,
)
_skills.provision_skills(plan, target)
def validate_ssh_entries(self, entries: Sequence[SshEntry]) -> None:
"""Each entry's IdentityFile must exist on the host (after
@@ -383,196 +317,12 @@ class DockerBottleBackend(BottleBackend):
die(f"ssh key file not found for host '{entry.Host}': {key}")
def provision_ssh(self, plan: BottlePlan, target: str) -> None:
"""Set up SSH in the container so node can authenticate using
each entry's key without the key file being readable by node.
No-op when the bottle has no SSH entries.
Isolation strategy:
- Keys live at /root/.claude-bottle-keys/ (mode 700,
root-owned). /root is mode 700 in node:22-slim, so node
(uid 1000) can't even traverse in.
- ssh-agent runs as root, listening on
/run/claude-bottle-agent.sock. Each key is loaded with
ssh-add, then deleted; the bytes now live only in the
agent process's memory.
- ssh-agent's SO_PEERCRED-based UID match rejects every
connection whose peer euid is neither 0 nor the agent's.
To bridge that, a root-owned socat forwarder listens on
/run/claude-bottle-agent-public.sock (mode 666) and
proxies bytes to the real agent socket.
- node can't ptrace root-owned agent or socat, so
/proc/<pid>/mem is off-limits and key bytes never leave
root-owned memory.
- ~/.ssh/config in node's home points each Host at the
public socket via IdentityAgent.
Why an in-container agent (not bind-mounted from host):
Docker Desktop on macOS does not forward Unix-domain socket
connect() across the VM boundary — connect() returns
ENOTSUP. Running ssh-agent inside the container sidesteps
that entirely.
Limitation: keys must be passphrase-less. ssh-add prompts on
/dev/tty for passphrases, but our docker exec has no TTY."""
assert isinstance(plan, DockerBottlePlan)
bottle = plan.spec.manifest.bottle_for(plan.spec.agent_name)
if not bottle.ssh:
return
container = target
proxy_host_port = pipelock_proxy_host_port(plan.slug)
container_home = os.environ.get("CLAUDE_BOTTLE_CONTAINER_HOME", "/home/node")
container_ssh = f"{container_home}/.ssh"
agent_socket = "/run/claude-bottle-agent.sock"
public_socket = "/run/claude-bottle-agent-public.sock"
keys_dir = "/root/.claude-bottle-keys"
# ~/.ssh for node (700, owned by node).
docker_mod.docker_exec_root(container, ["mkdir", "-p", container_ssh])
docker_mod.docker_exec_root(container, ["chown", "node:node", container_ssh])
docker_mod.docker_exec_root(container, ["chmod", "700", container_ssh])
# /root/.claude-bottle-keys for root (700, root-owned).
docker_mod.docker_exec_root(container, ["mkdir", "-p", keys_dir])
docker_mod.docker_exec_root(container, ["chown", "root:root", keys_dir])
docker_mod.docker_exec_root(container, ["chmod", "700", keys_dir])
config_file = plan.stage_dir / "ssh_config"
known_hosts_file = plan.stage_dir / "ssh_known_hosts"
config_file.write_text("")
config_file.chmod(0o600)
known_hosts_file.write_text("")
known_hosts_file.chmod(0o600)
proxy_host, _, proxy_port = proxy_host_port.partition(":")
container_key_paths: list[str] = []
for entry in bottle.ssh:
name = entry.Host
key = expand_tilde(entry.IdentityFile)
hostname = entry.Hostname
user = entry.User
port = entry.Port
known_host_key = entry.KnownHostKey
key_basename = os.path.basename(key)
container_key_path = f"{keys_dir}/{key_basename}"
info(f"copying ssh key for '{name}' -> {container} (root-only staging)")
subprocess.run(
["docker", "cp", key, f"{container}:{container_key_path}"],
stdout=subprocess.DEVNULL,
check=True,
)
docker_mod.docker_exec_root(container, ["chown", "root:root", container_key_path])
docker_mod.docker_exec_root(container, ["chmod", "600", container_key_path])
container_key_paths.append(container_key_path)
# ProxyCommand tunnels SSH through pipelock via HTTP
# CONNECT. %h / %p expand to this block's HostName /
# Port. socat's PROXY: mode does CONNECT host:port to
# the proxy.
block = (
f"Host {name}\n"
f" HostName {hostname}\n"
f" User {user}\n"
f" Port {port}\n"
f" IdentityAgent {public_socket}\n"
f" ProxyCommand socat - PROXY:{proxy_host}:%h:%p,proxyport={proxy_port}\n"
f"\n"
)
with config_file.open("a") as f:
f.write(block)
if known_host_key:
entries_to_write: list[str] = []
if port == "22":
entries_to_write.append(f"{name} {known_host_key}\n")
if hostname != name:
entries_to_write.append(f"{hostname} {known_host_key}\n")
else:
entries_to_write.append(f"[{name}]:{port} {known_host_key}\n")
if hostname != name:
entries_to_write.append(f"[{hostname}]:{port} {known_host_key}\n")
with known_hosts_file.open("a") as f:
for e in entries_to_write:
f.write(e)
# Boot the agent, load each key, delete the key files, then
# start the root-owned socat forwarder. One docker exec so the
# whole sequence is atomic.
info(f"starting in-container ssh-agent at {agent_socket} (forwarded via {public_socket})")
setup_lines = [
"set -eu",
f"ssh-agent -a {agent_socket} >/dev/null",
]
for kp in container_key_paths:
setup_lines.append(f"SSH_AUTH_SOCK={agent_socket} ssh-add {kp}")
setup_lines.append(f"rm -f {kp}")
setup_lines.append(f"rmdir {keys_dir} 2>/dev/null || true")
# Forwarder: socat (uid 0) connects to the agent on node's behalf.
setup_lines.append(
f"nohup socat UNIX-LISTEN:{public_socket},fork,reuseaddr,mode=666 "
f"UNIX-CONNECT:{agent_socket} </dev/null >/dev/null 2>&1 &"
)
# Wait briefly for the forwarder to bind.
setup_lines.extend([
"i=0",
"while [ $i -lt 20 ]; do",
f" [ -S {public_socket} ] && break",
" i=$((i + 1))",
" sleep 0.1",
"done",
f"[ -S {public_socket} ] || {{ echo 'claude-bottle: socat forwarder failed to bind {public_socket}' >&2; exit 1; }}",
])
setup_script = "\n".join(setup_lines) + "\n"
subprocess.run(
["docker", "exec", "-u", "0", container, "sh", "-c", setup_script],
check=True,
)
info(f"writing {container_ssh}/config")
subprocess.run(
["docker", "cp", str(config_file), f"{container}:{container_ssh}/config"],
stdout=subprocess.DEVNULL,
check=True,
)
docker_mod.docker_exec_root(container, ["chown", "node:node", f"{container_ssh}/config"])
docker_mod.docker_exec_root(container, ["chmod", "600", f"{container_ssh}/config"])
if known_hosts_file.stat().st_size > 0:
info(f"writing {container_ssh}/known_hosts")
subprocess.run(
["docker", "cp", str(known_hosts_file), f"{container}:{container_ssh}/known_hosts"],
stdout=subprocess.DEVNULL,
check=True,
)
docker_mod.docker_exec_root(container, ["chown", "node:node", f"{container_ssh}/known_hosts"])
docker_mod.docker_exec_root(container, ["chmod", "600", f"{container_ssh}/known_hosts"])
_ssh.provision_ssh(plan, target)
def provision_git(self, plan: BottlePlan, target: str) -> None:
"""If --cwd was set and the host cwd has a .git directory, copy
it into /home/node/workspace/.git and fix ownership. No-op
otherwise."""
assert isinstance(plan, DockerBottlePlan)
if not (plan.spec.copy_cwd and Path(plan.spec.user_cwd, ".git").is_dir()):
return
container = target
info(f"copying {plan.spec.user_cwd}/.git -> {container}:/home/node/workspace/.git")
subprocess.run(
["docker", "cp", f"{plan.spec.user_cwd}/.git", f"{container}:/home/node/workspace/.git"],
stdout=subprocess.DEVNULL,
check=True,
)
subprocess.run(
[
"docker", "exec", "-u", "0", container,
"chown", "-R", "node:node", "/home/node/workspace/.git",
],
stdout=subprocess.DEVNULL,
check=True,
)
_git.provision_git(plan, target)
# --- Cleanup ---
@@ -0,0 +1,8 @@
"""Per-provisioner modules for the Docker backend.
Each module exports one top-level function:
provision_<thing>(plan: DockerBottlePlan, target: str) -> ...
`DockerBottleBackend.provision_*` methods delegate to these. The
abstract `BottleBackend.provision_*` surface is unchanged; this
subpackage exists only to keep `backend.py` from being a god-file."""
@@ -0,0 +1,36 @@
"""Copy the host cwd's .git directory into a running Docker bottle.
Only fires when `--cwd` was passed AND the host cwd actually has a
.git. The container-side path is fixed at /home/node/workspace/.git;
ownership is reset to node so the agent can run git commands."""
from __future__ import annotations
import subprocess
from pathlib import Path
from ....log import info
from ..bottle_plan import DockerBottlePlan
def provision_git(plan: DockerBottlePlan, target: str) -> None:
"""If --cwd was set and the host cwd has a .git directory, copy
it into /home/node/workspace/.git and fix ownership. No-op
otherwise."""
if not (plan.spec.copy_cwd and Path(plan.spec.user_cwd, ".git").is_dir()):
return
container = target
info(f"copying {plan.spec.user_cwd}/.git -> {container}:/home/node/workspace/.git")
subprocess.run(
["docker", "cp", f"{plan.spec.user_cwd}/.git", f"{container}:/home/node/workspace/.git"],
stdout=subprocess.DEVNULL,
check=True,
)
subprocess.run(
[
"docker", "exec", "-u", "0", container,
"chown", "-R", "node:node", "/home/node/workspace/.git",
],
stdout=subprocess.DEVNULL,
check=True,
)
@@ -0,0 +1,43 @@
"""Copy the agent prompt into a running Docker bottle.
The prompt file is always copied (so the in-container path always
exists) but `--append-system-prompt-file` only fires when the agent
actually has a prompt — the return value signals which case."""
from __future__ import annotations
import os
import subprocess
from ..bottle_plan import DockerBottlePlan
def provision_prompt(plan: DockerBottlePlan, target: str) -> str | None:
"""Copy the prompt file into the container, fix ownership/mode.
Returns the in-container path if the agent has a non-empty
prompt (drives --append-system-prompt-file), else None. The
file is copied either way so the path always exists."""
container = target
container_home = os.environ.get("CLAUDE_BOTTLE_CONTAINER_HOME", "/home/node")
in_container_prompt_path = f"{container_home}/.claude-bottle-prompt.txt"
subprocess.run(
["docker", "cp", str(plan.prompt_file), f"{container}:{in_container_prompt_path}"],
stdout=subprocess.DEVNULL,
check=True,
)
# `docker cp` preserves host UID; re-own/mode as root so node
# can read its own mode-600 prompt regardless of host UID.
subprocess.run(
["docker", "exec", "-u", "0", container, "chown", "node:node", in_container_prompt_path],
stdout=subprocess.DEVNULL,
check=True,
)
subprocess.run(
["docker", "exec", "-u", "0", container, "chmod", "600", in_container_prompt_path],
stdout=subprocess.DEVNULL,
check=True,
)
agent = plan.spec.manifest.agents[plan.spec.agent_name]
return in_container_prompt_path if agent.prompt else None
@@ -0,0 +1,62 @@
"""Copy host-side skill directories into a running Docker bottle.
Skills are validated on the host before launch by
`DockerBottleBackend.validate_skills`; this module assumes that
validation has already run. A skill disappearing between validation
and copy still dies loudly rather than silently producing a partial
container."""
from __future__ import annotations
import os
import subprocess
from ....log import die, info
from ...util import host_skill_dir
from ..bottle_plan import DockerBottlePlan
def provision_skills(plan: DockerBottlePlan, target: str) -> None:
"""Copy each of the agent's named skills from the host's
~/.claude/skills/<name>/ into the container's equivalent path.
For each skill: ensure parent dir, wipe any prior copy, then
`docker cp <host>/. <container>:<dst>/` so the contents are
copied into a freshly-created destination dir. No-op when the
agent has no skills."""
agent = plan.spec.manifest.agents[plan.spec.agent_name]
if not agent.skills:
return
container = target
container_home = os.environ.get("CLAUDE_BOTTLE_CONTAINER_HOME", "/home/node")
skills_dir = os.environ.get(
"CLAUDE_BOTTLE_CONTAINER_SKILLS_DIR", f"{container_home}/.claude/skills"
)
subprocess.run(
["docker", "exec", container, "mkdir", "-p", skills_dir],
stdout=subprocess.DEVNULL,
check=True,
)
for n in agent.skills:
src = host_skill_dir(n)
if not os.path.isdir(src):
die(f"skill '{n}' disappeared from host between validation and copy at {src}.")
dst = f"{skills_dir}/{n}"
info(f"copying skill {n} into {container}:{dst}")
subprocess.run(
["docker", "exec", container, "rm", "-rf", dst],
stdout=subprocess.DEVNULL,
check=True,
)
subprocess.run(
["docker", "exec", container, "mkdir", "-p", dst],
stdout=subprocess.DEVNULL,
check=True,
)
subprocess.run(
["docker", "cp", f"{src}/.", f"{container}:{dst}/"],
stdout=subprocess.DEVNULL,
check=True,
)
@@ -0,0 +1,193 @@
"""Set up SSH inside a running Docker bottle.
This is the most involved provisioner. The end state in the container:
- ~/.ssh/config + ~/.ssh/known_hosts owned by node, mode 600
- ssh-agent running as root with each key loaded; agent socket at
/run/claude-bottle-agent.sock
- socat forwarder (also root) bridging the agent socket to
/run/claude-bottle-agent-public.sock (mode 666) so node can talk
to the agent despite ssh-agent's SO_PEERCRED UID match
- on-disk key files deleted after `ssh-add`; the bytes only live in
the agent process's memory thereafter
See the `provision_ssh` docstring for the full isolation rationale."""
from __future__ import annotations
import os
import subprocess
from ....log import info
from ....util import expand_tilde
from .. import util as docker_mod
from ..bottle_plan import DockerBottlePlan
from ..pipelock import pipelock_proxy_host_port
def provision_ssh(plan: DockerBottlePlan, target: str) -> None:
"""Set up SSH in the container so node can authenticate using
each entry's key without the key file being readable by node.
No-op when the bottle has no SSH entries.
Isolation strategy:
- Keys live at /root/.claude-bottle-keys/ (mode 700,
root-owned). /root is mode 700 in node:22-slim, so node
(uid 1000) can't even traverse in.
- ssh-agent runs as root, listening on
/run/claude-bottle-agent.sock. Each key is loaded with
ssh-add, then deleted; the bytes now live only in the
agent process's memory.
- ssh-agent's SO_PEERCRED-based UID match rejects every
connection whose peer euid is neither 0 nor the agent's.
To bridge that, a root-owned socat forwarder listens on
/run/claude-bottle-agent-public.sock (mode 666) and
proxies bytes to the real agent socket.
- node can't ptrace root-owned agent or socat, so
/proc/<pid>/mem is off-limits and key bytes never leave
root-owned memory.
- ~/.ssh/config in node's home points each Host at the
public socket via IdentityAgent.
Why an in-container agent (not bind-mounted from host):
Docker Desktop on macOS does not forward Unix-domain socket
connect() across the VM boundary — connect() returns
ENOTSUP. Running ssh-agent inside the container sidesteps
that entirely.
Limitation: keys must be passphrase-less. ssh-add prompts on
/dev/tty for passphrases, but our docker exec has no TTY."""
bottle = plan.spec.manifest.bottle_for(plan.spec.agent_name)
if not bottle.ssh:
return
container = target
proxy_host_port = pipelock_proxy_host_port(plan.slug)
container_home = os.environ.get("CLAUDE_BOTTLE_CONTAINER_HOME", "/home/node")
container_ssh = f"{container_home}/.ssh"
agent_socket = "/run/claude-bottle-agent.sock"
public_socket = "/run/claude-bottle-agent-public.sock"
keys_dir = "/root/.claude-bottle-keys"
# ~/.ssh for node (700, owned by node).
docker_mod.docker_exec_root(container, ["mkdir", "-p", container_ssh])
docker_mod.docker_exec_root(container, ["chown", "node:node", container_ssh])
docker_mod.docker_exec_root(container, ["chmod", "700", container_ssh])
# /root/.claude-bottle-keys for root (700, root-owned).
docker_mod.docker_exec_root(container, ["mkdir", "-p", keys_dir])
docker_mod.docker_exec_root(container, ["chown", "root:root", keys_dir])
docker_mod.docker_exec_root(container, ["chmod", "700", keys_dir])
config_file = plan.stage_dir / "ssh_config"
known_hosts_file = plan.stage_dir / "ssh_known_hosts"
config_file.write_text("")
config_file.chmod(0o600)
known_hosts_file.write_text("")
known_hosts_file.chmod(0o600)
proxy_host, _, proxy_port = proxy_host_port.partition(":")
container_key_paths: list[str] = []
for entry in bottle.ssh:
name = entry.Host
key = expand_tilde(entry.IdentityFile)
hostname = entry.Hostname
user = entry.User
port = entry.Port
known_host_key = entry.KnownHostKey
key_basename = os.path.basename(key)
container_key_path = f"{keys_dir}/{key_basename}"
info(f"copying ssh key for '{name}' -> {container} (root-only staging)")
subprocess.run(
["docker", "cp", key, f"{container}:{container_key_path}"],
stdout=subprocess.DEVNULL,
check=True,
)
docker_mod.docker_exec_root(container, ["chown", "root:root", container_key_path])
docker_mod.docker_exec_root(container, ["chmod", "600", container_key_path])
container_key_paths.append(container_key_path)
# ProxyCommand tunnels SSH through pipelock via HTTP
# CONNECT. %h / %p expand to this block's HostName /
# Port. socat's PROXY: mode does CONNECT host:port to
# the proxy.
block = (
f"Host {name}\n"
f" HostName {hostname}\n"
f" User {user}\n"
f" Port {port}\n"
f" IdentityAgent {public_socket}\n"
f" ProxyCommand socat - PROXY:{proxy_host}:%h:%p,proxyport={proxy_port}\n"
f"\n"
)
with config_file.open("a") as f:
f.write(block)
if known_host_key:
entries_to_write: list[str] = []
if port == "22":
entries_to_write.append(f"{name} {known_host_key}\n")
if hostname != name:
entries_to_write.append(f"{hostname} {known_host_key}\n")
else:
entries_to_write.append(f"[{name}]:{port} {known_host_key}\n")
if hostname != name:
entries_to_write.append(f"[{hostname}]:{port} {known_host_key}\n")
with known_hosts_file.open("a") as f:
for e in entries_to_write:
f.write(e)
# Boot the agent, load each key, delete the key files, then
# start the root-owned socat forwarder. One docker exec so the
# whole sequence is atomic.
info(f"starting in-container ssh-agent at {agent_socket} (forwarded via {public_socket})")
setup_lines = [
"set -eu",
f"ssh-agent -a {agent_socket} >/dev/null",
]
for kp in container_key_paths:
setup_lines.append(f"SSH_AUTH_SOCK={agent_socket} ssh-add {kp}")
setup_lines.append(f"rm -f {kp}")
setup_lines.append(f"rmdir {keys_dir} 2>/dev/null || true")
# Forwarder: socat (uid 0) connects to the agent on node's behalf.
setup_lines.append(
f"nohup socat UNIX-LISTEN:{public_socket},fork,reuseaddr,mode=666 "
f"UNIX-CONNECT:{agent_socket} </dev/null >/dev/null 2>&1 &"
)
# Wait briefly for the forwarder to bind.
setup_lines.extend([
"i=0",
"while [ $i -lt 20 ]; do",
f" [ -S {public_socket} ] && break",
" i=$((i + 1))",
" sleep 0.1",
"done",
f"[ -S {public_socket} ] || {{ echo 'claude-bottle: socat forwarder failed to bind {public_socket}' >&2; exit 1; }}",
])
setup_script = "\n".join(setup_lines) + "\n"
subprocess.run(
["docker", "exec", "-u", "0", container, "sh", "-c", setup_script],
check=True,
)
info(f"writing {container_ssh}/config")
subprocess.run(
["docker", "cp", str(config_file), f"{container}:{container_ssh}/config"],
stdout=subprocess.DEVNULL,
check=True,
)
docker_mod.docker_exec_root(container, ["chown", "node:node", f"{container_ssh}/config"])
docker_mod.docker_exec_root(container, ["chmod", "600", f"{container_ssh}/config"])
if known_hosts_file.stat().st_size > 0:
info(f"writing {container_ssh}/known_hosts")
subprocess.run(
["docker", "cp", str(known_hosts_file), f"{container}:{container_ssh}/known_hosts"],
stdout=subprocess.DEVNULL,
check=True,
)
docker_mod.docker_exec_root(container, ["chown", "node:node", f"{container_ssh}/known_hosts"])
docker_mod.docker_exec_root(container, ["chmod", "600", f"{container_ssh}/known_hosts"])
+226
View File
@@ -0,0 +1,226 @@
# PRD 0004: Split out provisioners
- **Status:** Draft
- **Author:** didericis
- **Created:** 2026-05-11
## Summary
Break `claude_bottle/backend/docker/backend.py` (664 lines) apart by
moving the four provisioner methods — `provision_prompt`,
`provision_skills`, `provision_ssh`, `provision_git` — out of
`DockerBottleBackend` into their own modules under
`claude_bottle/backend/docker/provision/`. The abstract base in
`claude_bottle/backend/__init__.py` keeps the same four-method
contract; only the Docker implementation changes shape.
## Problem
`DockerBottleBackend` is doing too much in one file. After PRD 0003
landed, the class owns:
- `prepare` — name resolution, validation, scratch file writes
- `launch` — image build, network creation, sidecar lifecycle,
`docker run`, teardown
- `_run_agent_container` — argv assembly + name-conflict retry
- `provision_prompt` / `provision_skills` / `provision_ssh` /
`provision_git` — four host→container copy paths
- `prepare_cleanup` / `cleanup` / `list_active` — orphan handling
The provisioners are the largest single chunk. `provision_ssh` alone
is ~150 lines because it sets up a root-staged keyring, an in-container
`ssh-agent`, and a `socat` forwarder so node (uid 1000) can talk to a
root-owned agent socket without ptrace access. That logic is
self-contained — it touches the container via `docker exec` and
`docker cp` and reads from `BottlePlan` — but it sits in the same file
as image build and cleanup, which makes the file hard to scan and
invites unrelated changes to land in the same diff.
The provisioners are also the most likely place for new backends to
diverge. A future fly.io backend would not run `ssh-agent` in a
sidecar this way; an Apple `container` backend might. Pulling each
provisioner into its own module makes the per-backend variation a
file boundary, not a method boundary inside a god-class.
## Goals / Success Criteria
The feature works when all of the following are observable:
- `cli.py start` produces a byte-identical container topology, env,
skills layout, SSH config, and `.git` copy as before the split. No
user-visible behavior change.
- `DockerBottleBackend` in `backend.py` is under ~350 lines, with the
four provisioner methods reduced to thin dispatchers that delegate
to the per-provisioner modules.
- The full test suite passes unchanged (unit + integration + canary).
The feature is **done** when all of the following ship:
- A new `claude_bottle/backend/docker/provision/` subpackage exists
with one module per provisioner: `prompt.py`, `skills.py`, `ssh.py`,
`git.py`. Each exports a single top-level function taking
`(plan: DockerBottlePlan, target: str)` and returning the same type
the current method returns (`str | None` for prompt, `None` for the
others).
- `DockerBottleBackend.provision_prompt` / `provision_skills` /
`provision_ssh` / `provision_git` each become one-line delegations
to the new module functions.
- The abstract `BottleBackend.provision_*` signatures in
`claude_bottle/backend/__init__.py` are unchanged. The
`BottleBackend.provision` orchestration in the base class is
unchanged.
- No top-level CLI code or other backend gains a direct import of the
provisioner modules — the only call site is
`DockerBottleBackend.provision_*`.
## Non-goals
- No change to *what* the provisioners do. The SSH provisioning's
root-keyring + ssh-agent + socat-bridge design stays exactly as it
is. The skills `docker cp <src>/. <dst>/` pattern stays. The
`.git` copy stays gated on `spec.copy_cwd` + cwd having a `.git`.
- No replacement of `launch`'s ad-hoc `state: dict[str, str]`
teardown with `contextlib.ExitStack`. That cleanup is worthwhile
but is a separate change.
- No deduplication of the two name-conflict retry loops (one in
`prepare`, one in `_run_agent_container`).
- No removal of the `os.environ["CLAUDE_CODE_OAUTH_TOKEN"]` mutation
in `_run_agent_container`. That's a parent-process side effect
worth fixing, but it's outside the provisioner split.
- No new abstract base class for provisioners (no `Provisioner` ABC).
The four functions stay module-level; the abstract surface is the
four methods on `BottleBackend`. Introducing a `Provisioner` type
would be premature with one backend.
- No change to the `BottleBackend.provision_*` method names or
signatures. Callers continue to invoke them on the backend
instance.
## Scope
### In scope
- New `claude_bottle/backend/docker/provision/` subpackage with
`__init__.py`, `prompt.py`, `skills.py`, `ssh.py`, `git.py`.
- Moving the four method bodies out of
`DockerBottleBackend` into the new modules verbatim, adjusting only
what's needed to make them free functions: `self` becomes implicit
via the `plan` argument; private helpers move with their primary
caller; imports of `docker_mod`, `network_mod`, `pipelock`,
`expand_tilde`, etc. follow them.
- Reducing the `provision_*` methods on `DockerBottleBackend` to
one-line delegations.
- Updating any tests that monkeypatch
`DockerBottleBackend.provision_*` to monkeypatch the new module
functions instead (if any do — most existing tests don't reach
into provisioning).
### Out of scope
- `prepare`, `launch`, `_run_agent_container`, `prepare_cleanup`,
`cleanup`, `list_active`. These stay in `backend.py`.
- The `validate_skills` and `validate_ssh_entries` host-side
validation methods. They run from `prepare` (before the y/N), not
from `provision`, so they belong with `prepare` and stay on the
class.
- Any change to the abstract `BottleBackend` base in
`backend/__init__.py`.
- Cross-backend reuse of provisioner code. There's no second backend
yet; designing for one before it exists would be premature.
## Proposed Design
### New layout
```
claude_bottle/backend/docker/
backend.py # DockerBottleBackend (slimmer)
bottle.py
bottle_plan.py
bottle_cleanup_plan.py
network.py
pipelock.py
util.py
provision/
__init__.py # empty; explicit imports per module
prompt.py # provision_prompt(plan, target) -> str | None
skills.py # provision_skills(plan, target) -> None
ssh.py # provision_ssh(plan, target) -> None
git.py # provision_git(plan, target) -> None
```
### Function signatures
Each module exports one top-level function with the same shape:
```python
# prompt.py
def provision_prompt(plan: DockerBottlePlan, target: str) -> str | None: ...
# skills.py
def provision_skills(plan: DockerBottlePlan, target: str) -> None: ...
# ssh.py
def provision_ssh(plan: DockerBottlePlan, target: str) -> None: ...
# git.py
def provision_git(plan: DockerBottlePlan, target: str) -> None: ...
```
`target` is the resolved container name (same value the current
methods receive). The functions are free functions, not methods, so
they don't accept `self`.
### Delegation on the backend
`DockerBottleBackend.provision_*` shrinks to:
```python
from .provision import prompt as _prompt
from .provision import skills as _skills
from .provision import ssh as _ssh
from .provision import git as _git
class DockerBottleBackend(BottleBackend):
...
def provision_prompt(self, plan: BottlePlan, target: str) -> str | None:
assert isinstance(plan, DockerBottlePlan)
return _prompt.provision_prompt(plan, target)
def provision_skills(self, plan: BottlePlan, target: str) -> None:
assert isinstance(plan, DockerBottlePlan)
_skills.provision_skills(plan, target)
# ...same for ssh, git
```
The `isinstance` assert stays on the method (the abstract base passes
`BottlePlan`, not `DockerBottlePlan`) so the module functions can
take the concrete type and skip re-checking.
### Existing code touched
- **`claude_bottle/backend/docker/backend.py`** — four method
bodies move out; method definitions stay as one-line delegations.
Imports for `pipelock_proxy_host_port`, `expand_tilde`, etc., that
are only used by the moved bodies migrate with them.
- **`claude_bottle/backend/docker/__init__.py`** — no change. The
public surface (`DockerBottleBackend`) is unchanged.
- **`claude_bottle/backend/__init__.py`** — no change.
- **`tests/`** — no expected change. Existing tests exercise the
backend via `DockerBottleBackend` or the CLI surface; they don't
reach into provisioners directly. Verify after the move and only
update if a test breaks.
### Data model changes
None.
### External dependencies
None new.
## References
- PRD 0003 (`docs/prds/0003-bottle-backend-abstraction.md`) —
establishes the four-method provisioner contract being preserved
here.