PRD 0004: Split out provisioners #7
@@ -32,9 +32,12 @@ from .bottle_cleanup_plan import DockerBottleCleanupPlan
|
||||
from .bottle_plan import DockerBottlePlan
|
||||
from .pipelock import (
|
||||
DockerPipelockProxy,
|
||||
pipelock_proxy_host_port,
|
||||
pipelock_proxy_url,
|
||||
)
|
||||
from .provision import git as _git
|
||||
from .provision import prompt as _prompt
|
||||
from .provision import skills as _skills
|
||||
from .provision import ssh as _ssh
|
||||
|
||||
|
||||
# Where the repo root lives, for `docker build` context. Computed once.
|
||||
@@ -282,35 +285,8 @@ class DockerBottleBackend(BottleBackend):
|
||||
info(f"name conflict; retrying as {container}")
|
||||
|
||||
def provision_prompt(self, plan: BottlePlan, target: str) -> str | None:
|
||||
"""Copy the prompt file into the container, fix ownership/mode.
|
||||
Returns the in-container path if the agent has a non-empty
|
||||
prompt (drives --append-system-prompt-file), else None. The
|
||||
file is copied either way so the path always exists."""
|
||||
assert isinstance(plan, DockerBottlePlan)
|
||||
container = target
|
||||
container_home = os.environ.get("CLAUDE_BOTTLE_CONTAINER_HOME", "/home/node")
|
||||
in_container_prompt_path = f"{container_home}/.claude-bottle-prompt.txt"
|
||||
|
||||
subprocess.run(
|
||||
["docker", "cp", str(plan.prompt_file), f"{container}:{in_container_prompt_path}"],
|
||||
stdout=subprocess.DEVNULL,
|
||||
check=True,
|
||||
)
|
||||
# `docker cp` preserves host UID; re-own/mode as root so node
|
||||
# can read its own mode-600 prompt regardless of host UID.
|
||||
subprocess.run(
|
||||
["docker", "exec", "-u", "0", container, "chown", "node:node", in_container_prompt_path],
|
||||
stdout=subprocess.DEVNULL,
|
||||
check=True,
|
||||
)
|
||||
subprocess.run(
|
||||
["docker", "exec", "-u", "0", container, "chmod", "600", in_container_prompt_path],
|
||||
stdout=subprocess.DEVNULL,
|
||||
check=True,
|
||||
)
|
||||
|
||||
agent = plan.spec.manifest.agents[plan.spec.agent_name]
|
||||
return in_container_prompt_path if agent.prompt else None
|
||||
return _prompt.provision_prompt(plan, target)
|
||||
|
||||
def validate_skills(self, skills: list[str]) -> None:
|
||||
"""Fail loudly if any named skill is missing from the host's
|
||||
@@ -326,50 +302,8 @@ class DockerBottleBackend(BottleBackend):
|
||||
)
|
||||
|
||||
def provision_skills(self, plan: BottlePlan, target: str) -> None:
|
||||
"""Copy each of the agent's named skills from the host's
|
||||
~/.claude/skills/<name>/ into the container's equivalent path.
|
||||
For each skill: ensure parent dir, wipe any prior copy, then
|
||||
`docker cp <host>/. <container>:<dst>/` so the contents are
|
||||
copied into a freshly-created destination dir. No-op when the
|
||||
agent has no skills."""
|
||||
assert isinstance(plan, DockerBottlePlan)
|
||||
agent = plan.spec.manifest.agents[plan.spec.agent_name]
|
||||
if not agent.skills:
|
||||
return
|
||||
|
||||
container = target
|
||||
container_home = os.environ.get("CLAUDE_BOTTLE_CONTAINER_HOME", "/home/node")
|
||||
skills_dir = os.environ.get(
|
||||
"CLAUDE_BOTTLE_CONTAINER_SKILLS_DIR", f"{container_home}/.claude/skills"
|
||||
)
|
||||
|
||||
subprocess.run(
|
||||
["docker", "exec", container, "mkdir", "-p", skills_dir],
|
||||
stdout=subprocess.DEVNULL,
|
||||
check=True,
|
||||
)
|
||||
|
||||
for n in agent.skills:
|
||||
src = host_skill_dir(n)
|
||||
if not os.path.isdir(src):
|
||||
die(f"skill '{n}' disappeared from host between validation and copy at {src}.")
|
||||
dst = f"{skills_dir}/{n}"
|
||||
info(f"copying skill {n} into {container}:{dst}")
|
||||
subprocess.run(
|
||||
["docker", "exec", container, "rm", "-rf", dst],
|
||||
stdout=subprocess.DEVNULL,
|
||||
check=True,
|
||||
)
|
||||
subprocess.run(
|
||||
["docker", "exec", container, "mkdir", "-p", dst],
|
||||
stdout=subprocess.DEVNULL,
|
||||
check=True,
|
||||
)
|
||||
subprocess.run(
|
||||
["docker", "cp", f"{src}/.", f"{container}:{dst}/"],
|
||||
stdout=subprocess.DEVNULL,
|
||||
check=True,
|
||||
)
|
||||
_skills.provision_skills(plan, target)
|
||||
|
||||
def validate_ssh_entries(self, entries: Sequence[SshEntry]) -> None:
|
||||
"""Each entry's IdentityFile must exist on the host (after
|
||||
@@ -383,196 +317,12 @@ class DockerBottleBackend(BottleBackend):
|
||||
die(f"ssh key file not found for host '{entry.Host}': {key}")
|
||||
|
||||
def provision_ssh(self, plan: BottlePlan, target: str) -> None:
|
||||
"""Set up SSH in the container so node can authenticate using
|
||||
each entry's key without the key file being readable by node.
|
||||
No-op when the bottle has no SSH entries.
|
||||
|
||||
Isolation strategy:
|
||||
- Keys live at /root/.claude-bottle-keys/ (mode 700,
|
||||
root-owned). /root is mode 700 in node:22-slim, so node
|
||||
(uid 1000) can't even traverse in.
|
||||
- ssh-agent runs as root, listening on
|
||||
/run/claude-bottle-agent.sock. Each key is loaded with
|
||||
ssh-add, then deleted; the bytes now live only in the
|
||||
agent process's memory.
|
||||
- ssh-agent's SO_PEERCRED-based UID match rejects every
|
||||
connection whose peer euid is neither 0 nor the agent's.
|
||||
To bridge that, a root-owned socat forwarder listens on
|
||||
/run/claude-bottle-agent-public.sock (mode 666) and
|
||||
proxies bytes to the real agent socket.
|
||||
- node can't ptrace root-owned agent or socat, so
|
||||
/proc/<pid>/mem is off-limits and key bytes never leave
|
||||
root-owned memory.
|
||||
- ~/.ssh/config in node's home points each Host at the
|
||||
public socket via IdentityAgent.
|
||||
|
||||
Why an in-container agent (not bind-mounted from host):
|
||||
Docker Desktop on macOS does not forward Unix-domain socket
|
||||
connect() across the VM boundary — connect() returns
|
||||
ENOTSUP. Running ssh-agent inside the container sidesteps
|
||||
that entirely.
|
||||
|
||||
Limitation: keys must be passphrase-less. ssh-add prompts on
|
||||
/dev/tty for passphrases, but our docker exec has no TTY."""
|
||||
assert isinstance(plan, DockerBottlePlan)
|
||||
bottle = plan.spec.manifest.bottle_for(plan.spec.agent_name)
|
||||
if not bottle.ssh:
|
||||
return
|
||||
|
||||
container = target
|
||||
proxy_host_port = pipelock_proxy_host_port(plan.slug)
|
||||
container_home = os.environ.get("CLAUDE_BOTTLE_CONTAINER_HOME", "/home/node")
|
||||
container_ssh = f"{container_home}/.ssh"
|
||||
agent_socket = "/run/claude-bottle-agent.sock"
|
||||
public_socket = "/run/claude-bottle-agent-public.sock"
|
||||
keys_dir = "/root/.claude-bottle-keys"
|
||||
|
||||
# ~/.ssh for node (700, owned by node).
|
||||
docker_mod.docker_exec_root(container, ["mkdir", "-p", container_ssh])
|
||||
docker_mod.docker_exec_root(container, ["chown", "node:node", container_ssh])
|
||||
docker_mod.docker_exec_root(container, ["chmod", "700", container_ssh])
|
||||
|
||||
# /root/.claude-bottle-keys for root (700, root-owned).
|
||||
docker_mod.docker_exec_root(container, ["mkdir", "-p", keys_dir])
|
||||
docker_mod.docker_exec_root(container, ["chown", "root:root", keys_dir])
|
||||
docker_mod.docker_exec_root(container, ["chmod", "700", keys_dir])
|
||||
|
||||
config_file = plan.stage_dir / "ssh_config"
|
||||
known_hosts_file = plan.stage_dir / "ssh_known_hosts"
|
||||
config_file.write_text("")
|
||||
config_file.chmod(0o600)
|
||||
known_hosts_file.write_text("")
|
||||
known_hosts_file.chmod(0o600)
|
||||
|
||||
proxy_host, _, proxy_port = proxy_host_port.partition(":")
|
||||
|
||||
container_key_paths: list[str] = []
|
||||
for entry in bottle.ssh:
|
||||
name = entry.Host
|
||||
key = expand_tilde(entry.IdentityFile)
|
||||
hostname = entry.Hostname
|
||||
user = entry.User
|
||||
port = entry.Port
|
||||
known_host_key = entry.KnownHostKey
|
||||
|
||||
key_basename = os.path.basename(key)
|
||||
container_key_path = f"{keys_dir}/{key_basename}"
|
||||
|
||||
info(f"copying ssh key for '{name}' -> {container} (root-only staging)")
|
||||
subprocess.run(
|
||||
["docker", "cp", key, f"{container}:{container_key_path}"],
|
||||
stdout=subprocess.DEVNULL,
|
||||
check=True,
|
||||
)
|
||||
docker_mod.docker_exec_root(container, ["chown", "root:root", container_key_path])
|
||||
docker_mod.docker_exec_root(container, ["chmod", "600", container_key_path])
|
||||
|
||||
container_key_paths.append(container_key_path)
|
||||
|
||||
# ProxyCommand tunnels SSH through pipelock via HTTP
|
||||
# CONNECT. %h / %p expand to this block's HostName /
|
||||
# Port. socat's PROXY: mode does CONNECT host:port to
|
||||
# the proxy.
|
||||
block = (
|
||||
f"Host {name}\n"
|
||||
f" HostName {hostname}\n"
|
||||
f" User {user}\n"
|
||||
f" Port {port}\n"
|
||||
f" IdentityAgent {public_socket}\n"
|
||||
f" ProxyCommand socat - PROXY:{proxy_host}:%h:%p,proxyport={proxy_port}\n"
|
||||
f"\n"
|
||||
)
|
||||
with config_file.open("a") as f:
|
||||
f.write(block)
|
||||
|
||||
if known_host_key:
|
||||
entries_to_write: list[str] = []
|
||||
if port == "22":
|
||||
entries_to_write.append(f"{name} {known_host_key}\n")
|
||||
if hostname != name:
|
||||
entries_to_write.append(f"{hostname} {known_host_key}\n")
|
||||
else:
|
||||
entries_to_write.append(f"[{name}]:{port} {known_host_key}\n")
|
||||
if hostname != name:
|
||||
entries_to_write.append(f"[{hostname}]:{port} {known_host_key}\n")
|
||||
with known_hosts_file.open("a") as f:
|
||||
for e in entries_to_write:
|
||||
f.write(e)
|
||||
|
||||
# Boot the agent, load each key, delete the key files, then
|
||||
# start the root-owned socat forwarder. One docker exec so the
|
||||
# whole sequence is atomic.
|
||||
info(f"starting in-container ssh-agent at {agent_socket} (forwarded via {public_socket})")
|
||||
setup_lines = [
|
||||
"set -eu",
|
||||
f"ssh-agent -a {agent_socket} >/dev/null",
|
||||
]
|
||||
for kp in container_key_paths:
|
||||
setup_lines.append(f"SSH_AUTH_SOCK={agent_socket} ssh-add {kp}")
|
||||
setup_lines.append(f"rm -f {kp}")
|
||||
setup_lines.append(f"rmdir {keys_dir} 2>/dev/null || true")
|
||||
# Forwarder: socat (uid 0) connects to the agent on node's behalf.
|
||||
setup_lines.append(
|
||||
f"nohup socat UNIX-LISTEN:{public_socket},fork,reuseaddr,mode=666 "
|
||||
f"UNIX-CONNECT:{agent_socket} </dev/null >/dev/null 2>&1 &"
|
||||
)
|
||||
# Wait briefly for the forwarder to bind.
|
||||
setup_lines.extend([
|
||||
"i=0",
|
||||
"while [ $i -lt 20 ]; do",
|
||||
f" [ -S {public_socket} ] && break",
|
||||
" i=$((i + 1))",
|
||||
" sleep 0.1",
|
||||
"done",
|
||||
f"[ -S {public_socket} ] || {{ echo 'claude-bottle: socat forwarder failed to bind {public_socket}' >&2; exit 1; }}",
|
||||
])
|
||||
setup_script = "\n".join(setup_lines) + "\n"
|
||||
subprocess.run(
|
||||
["docker", "exec", "-u", "0", container, "sh", "-c", setup_script],
|
||||
check=True,
|
||||
)
|
||||
|
||||
info(f"writing {container_ssh}/config")
|
||||
subprocess.run(
|
||||
["docker", "cp", str(config_file), f"{container}:{container_ssh}/config"],
|
||||
stdout=subprocess.DEVNULL,
|
||||
check=True,
|
||||
)
|
||||
docker_mod.docker_exec_root(container, ["chown", "node:node", f"{container_ssh}/config"])
|
||||
docker_mod.docker_exec_root(container, ["chmod", "600", f"{container_ssh}/config"])
|
||||
|
||||
if known_hosts_file.stat().st_size > 0:
|
||||
info(f"writing {container_ssh}/known_hosts")
|
||||
subprocess.run(
|
||||
["docker", "cp", str(known_hosts_file), f"{container}:{container_ssh}/known_hosts"],
|
||||
stdout=subprocess.DEVNULL,
|
||||
check=True,
|
||||
)
|
||||
docker_mod.docker_exec_root(container, ["chown", "node:node", f"{container_ssh}/known_hosts"])
|
||||
docker_mod.docker_exec_root(container, ["chmod", "600", f"{container_ssh}/known_hosts"])
|
||||
_ssh.provision_ssh(plan, target)
|
||||
|
||||
def provision_git(self, plan: BottlePlan, target: str) -> None:
|
||||
"""If --cwd was set and the host cwd has a .git directory, copy
|
||||
it into /home/node/workspace/.git and fix ownership. No-op
|
||||
otherwise."""
|
||||
assert isinstance(plan, DockerBottlePlan)
|
||||
if not (plan.spec.copy_cwd and Path(plan.spec.user_cwd, ".git").is_dir()):
|
||||
return
|
||||
container = target
|
||||
info(f"copying {plan.spec.user_cwd}/.git -> {container}:/home/node/workspace/.git")
|
||||
subprocess.run(
|
||||
["docker", "cp", f"{plan.spec.user_cwd}/.git", f"{container}:/home/node/workspace/.git"],
|
||||
stdout=subprocess.DEVNULL,
|
||||
check=True,
|
||||
)
|
||||
subprocess.run(
|
||||
[
|
||||
"docker", "exec", "-u", "0", container,
|
||||
"chown", "-R", "node:node", "/home/node/workspace/.git",
|
||||
],
|
||||
stdout=subprocess.DEVNULL,
|
||||
check=True,
|
||||
)
|
||||
_git.provision_git(plan, target)
|
||||
|
||||
# --- Cleanup ---
|
||||
|
||||
|
||||
@@ -0,0 +1,8 @@
|
||||
"""Per-provisioner modules for the Docker backend.
|
||||
|
||||
Each module exports one top-level function:
|
||||
provision_<thing>(plan: DockerBottlePlan, target: str) -> ...
|
||||
|
||||
`DockerBottleBackend.provision_*` methods delegate to these. The
|
||||
abstract `BottleBackend.provision_*` surface is unchanged; this
|
||||
subpackage exists only to keep `backend.py` from being a god-file."""
|
||||
@@ -0,0 +1,36 @@
|
||||
"""Copy the host cwd's .git directory into a running Docker bottle.
|
||||
|
||||
Only fires when `--cwd` was passed AND the host cwd actually has a
|
||||
.git. The container-side path is fixed at /home/node/workspace/.git;
|
||||
ownership is reset to node so the agent can run git commands."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
from ....log import info
|
||||
from ..bottle_plan import DockerBottlePlan
|
||||
|
||||
|
||||
def provision_git(plan: DockerBottlePlan, target: str) -> None:
|
||||
"""If --cwd was set and the host cwd has a .git directory, copy
|
||||
it into /home/node/workspace/.git and fix ownership. No-op
|
||||
otherwise."""
|
||||
if not (plan.spec.copy_cwd and Path(plan.spec.user_cwd, ".git").is_dir()):
|
||||
return
|
||||
container = target
|
||||
info(f"copying {plan.spec.user_cwd}/.git -> {container}:/home/node/workspace/.git")
|
||||
subprocess.run(
|
||||
["docker", "cp", f"{plan.spec.user_cwd}/.git", f"{container}:/home/node/workspace/.git"],
|
||||
stdout=subprocess.DEVNULL,
|
||||
check=True,
|
||||
)
|
||||
subprocess.run(
|
||||
[
|
||||
"docker", "exec", "-u", "0", container,
|
||||
"chown", "-R", "node:node", "/home/node/workspace/.git",
|
||||
],
|
||||
stdout=subprocess.DEVNULL,
|
||||
check=True,
|
||||
)
|
||||
@@ -0,0 +1,43 @@
|
||||
"""Copy the agent prompt into a running Docker bottle.
|
||||
|
||||
The prompt file is always copied (so the in-container path always
|
||||
exists) but `--append-system-prompt-file` only fires when the agent
|
||||
actually has a prompt — the return value signals which case."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
|
||||
from ..bottle_plan import DockerBottlePlan
|
||||
|
||||
|
||||
def provision_prompt(plan: DockerBottlePlan, target: str) -> str | None:
|
||||
"""Copy the prompt file into the container, fix ownership/mode.
|
||||
Returns the in-container path if the agent has a non-empty
|
||||
prompt (drives --append-system-prompt-file), else None. The
|
||||
file is copied either way so the path always exists."""
|
||||
container = target
|
||||
container_home = os.environ.get("CLAUDE_BOTTLE_CONTAINER_HOME", "/home/node")
|
||||
in_container_prompt_path = f"{container_home}/.claude-bottle-prompt.txt"
|
||||
|
||||
subprocess.run(
|
||||
["docker", "cp", str(plan.prompt_file), f"{container}:{in_container_prompt_path}"],
|
||||
stdout=subprocess.DEVNULL,
|
||||
check=True,
|
||||
)
|
||||
# `docker cp` preserves host UID; re-own/mode as root so node
|
||||
# can read its own mode-600 prompt regardless of host UID.
|
||||
subprocess.run(
|
||||
["docker", "exec", "-u", "0", container, "chown", "node:node", in_container_prompt_path],
|
||||
stdout=subprocess.DEVNULL,
|
||||
check=True,
|
||||
)
|
||||
subprocess.run(
|
||||
["docker", "exec", "-u", "0", container, "chmod", "600", in_container_prompt_path],
|
||||
stdout=subprocess.DEVNULL,
|
||||
check=True,
|
||||
)
|
||||
|
||||
agent = plan.spec.manifest.agents[plan.spec.agent_name]
|
||||
return in_container_prompt_path if agent.prompt else None
|
||||
@@ -0,0 +1,62 @@
|
||||
"""Copy host-side skill directories into a running Docker bottle.
|
||||
|
||||
Skills are validated on the host before launch by
|
||||
`DockerBottleBackend.validate_skills`; this module assumes that
|
||||
validation has already run. A skill disappearing between validation
|
||||
and copy still dies loudly rather than silently producing a partial
|
||||
container."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
|
||||
from ....log import die, info
|
||||
from ...util import host_skill_dir
|
||||
from ..bottle_plan import DockerBottlePlan
|
||||
|
||||
|
||||
def provision_skills(plan: DockerBottlePlan, target: str) -> None:
|
||||
"""Copy each of the agent's named skills from the host's
|
||||
~/.claude/skills/<name>/ into the container's equivalent path.
|
||||
For each skill: ensure parent dir, wipe any prior copy, then
|
||||
`docker cp <host>/. <container>:<dst>/` so the contents are
|
||||
copied into a freshly-created destination dir. No-op when the
|
||||
agent has no skills."""
|
||||
agent = plan.spec.manifest.agents[plan.spec.agent_name]
|
||||
if not agent.skills:
|
||||
return
|
||||
|
||||
container = target
|
||||
container_home = os.environ.get("CLAUDE_BOTTLE_CONTAINER_HOME", "/home/node")
|
||||
skills_dir = os.environ.get(
|
||||
"CLAUDE_BOTTLE_CONTAINER_SKILLS_DIR", f"{container_home}/.claude/skills"
|
||||
)
|
||||
|
||||
subprocess.run(
|
||||
["docker", "exec", container, "mkdir", "-p", skills_dir],
|
||||
stdout=subprocess.DEVNULL,
|
||||
check=True,
|
||||
)
|
||||
|
||||
for n in agent.skills:
|
||||
src = host_skill_dir(n)
|
||||
if not os.path.isdir(src):
|
||||
die(f"skill '{n}' disappeared from host between validation and copy at {src}.")
|
||||
dst = f"{skills_dir}/{n}"
|
||||
info(f"copying skill {n} into {container}:{dst}")
|
||||
subprocess.run(
|
||||
["docker", "exec", container, "rm", "-rf", dst],
|
||||
stdout=subprocess.DEVNULL,
|
||||
check=True,
|
||||
)
|
||||
subprocess.run(
|
||||
["docker", "exec", container, "mkdir", "-p", dst],
|
||||
stdout=subprocess.DEVNULL,
|
||||
check=True,
|
||||
)
|
||||
subprocess.run(
|
||||
["docker", "cp", f"{src}/.", f"{container}:{dst}/"],
|
||||
stdout=subprocess.DEVNULL,
|
||||
check=True,
|
||||
)
|
||||
@@ -0,0 +1,193 @@
|
||||
"""Set up SSH inside a running Docker bottle.
|
||||
|
||||
This is the most involved provisioner. The end state in the container:
|
||||
- ~/.ssh/config + ~/.ssh/known_hosts owned by node, mode 600
|
||||
- ssh-agent running as root with each key loaded; agent socket at
|
||||
/run/claude-bottle-agent.sock
|
||||
- socat forwarder (also root) bridging the agent socket to
|
||||
/run/claude-bottle-agent-public.sock (mode 666) so node can talk
|
||||
to the agent despite ssh-agent's SO_PEERCRED UID match
|
||||
- on-disk key files deleted after `ssh-add`; the bytes only live in
|
||||
the agent process's memory thereafter
|
||||
|
||||
See the `provision_ssh` docstring for the full isolation rationale."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
|
||||
from ....log import info
|
||||
from ....util import expand_tilde
|
||||
from .. import util as docker_mod
|
||||
from ..bottle_plan import DockerBottlePlan
|
||||
from ..pipelock import pipelock_proxy_host_port
|
||||
|
||||
|
||||
def provision_ssh(plan: DockerBottlePlan, target: str) -> None:
|
||||
"""Set up SSH in the container so node can authenticate using
|
||||
each entry's key without the key file being readable by node.
|
||||
No-op when the bottle has no SSH entries.
|
||||
|
||||
Isolation strategy:
|
||||
- Keys live at /root/.claude-bottle-keys/ (mode 700,
|
||||
root-owned). /root is mode 700 in node:22-slim, so node
|
||||
(uid 1000) can't even traverse in.
|
||||
- ssh-agent runs as root, listening on
|
||||
/run/claude-bottle-agent.sock. Each key is loaded with
|
||||
ssh-add, then deleted; the bytes now live only in the
|
||||
agent process's memory.
|
||||
- ssh-agent's SO_PEERCRED-based UID match rejects every
|
||||
connection whose peer euid is neither 0 nor the agent's.
|
||||
To bridge that, a root-owned socat forwarder listens on
|
||||
/run/claude-bottle-agent-public.sock (mode 666) and
|
||||
proxies bytes to the real agent socket.
|
||||
- node can't ptrace root-owned agent or socat, so
|
||||
/proc/<pid>/mem is off-limits and key bytes never leave
|
||||
root-owned memory.
|
||||
- ~/.ssh/config in node's home points each Host at the
|
||||
public socket via IdentityAgent.
|
||||
|
||||
Why an in-container agent (not bind-mounted from host):
|
||||
Docker Desktop on macOS does not forward Unix-domain socket
|
||||
connect() across the VM boundary — connect() returns
|
||||
ENOTSUP. Running ssh-agent inside the container sidesteps
|
||||
that entirely.
|
||||
|
||||
Limitation: keys must be passphrase-less. ssh-add prompts on
|
||||
/dev/tty for passphrases, but our docker exec has no TTY."""
|
||||
bottle = plan.spec.manifest.bottle_for(plan.spec.agent_name)
|
||||
if not bottle.ssh:
|
||||
return
|
||||
|
||||
container = target
|
||||
proxy_host_port = pipelock_proxy_host_port(plan.slug)
|
||||
container_home = os.environ.get("CLAUDE_BOTTLE_CONTAINER_HOME", "/home/node")
|
||||
container_ssh = f"{container_home}/.ssh"
|
||||
agent_socket = "/run/claude-bottle-agent.sock"
|
||||
public_socket = "/run/claude-bottle-agent-public.sock"
|
||||
keys_dir = "/root/.claude-bottle-keys"
|
||||
|
||||
# ~/.ssh for node (700, owned by node).
|
||||
docker_mod.docker_exec_root(container, ["mkdir", "-p", container_ssh])
|
||||
docker_mod.docker_exec_root(container, ["chown", "node:node", container_ssh])
|
||||
docker_mod.docker_exec_root(container, ["chmod", "700", container_ssh])
|
||||
|
||||
# /root/.claude-bottle-keys for root (700, root-owned).
|
||||
docker_mod.docker_exec_root(container, ["mkdir", "-p", keys_dir])
|
||||
docker_mod.docker_exec_root(container, ["chown", "root:root", keys_dir])
|
||||
docker_mod.docker_exec_root(container, ["chmod", "700", keys_dir])
|
||||
|
||||
config_file = plan.stage_dir / "ssh_config"
|
||||
known_hosts_file = plan.stage_dir / "ssh_known_hosts"
|
||||
config_file.write_text("")
|
||||
config_file.chmod(0o600)
|
||||
known_hosts_file.write_text("")
|
||||
known_hosts_file.chmod(0o600)
|
||||
|
||||
proxy_host, _, proxy_port = proxy_host_port.partition(":")
|
||||
|
||||
container_key_paths: list[str] = []
|
||||
for entry in bottle.ssh:
|
||||
name = entry.Host
|
||||
key = expand_tilde(entry.IdentityFile)
|
||||
hostname = entry.Hostname
|
||||
user = entry.User
|
||||
port = entry.Port
|
||||
known_host_key = entry.KnownHostKey
|
||||
|
||||
key_basename = os.path.basename(key)
|
||||
container_key_path = f"{keys_dir}/{key_basename}"
|
||||
|
||||
info(f"copying ssh key for '{name}' -> {container} (root-only staging)")
|
||||
subprocess.run(
|
||||
["docker", "cp", key, f"{container}:{container_key_path}"],
|
||||
stdout=subprocess.DEVNULL,
|
||||
check=True,
|
||||
)
|
||||
docker_mod.docker_exec_root(container, ["chown", "root:root", container_key_path])
|
||||
docker_mod.docker_exec_root(container, ["chmod", "600", container_key_path])
|
||||
|
||||
container_key_paths.append(container_key_path)
|
||||
|
||||
# ProxyCommand tunnels SSH through pipelock via HTTP
|
||||
# CONNECT. %h / %p expand to this block's HostName /
|
||||
# Port. socat's PROXY: mode does CONNECT host:port to
|
||||
# the proxy.
|
||||
block = (
|
||||
f"Host {name}\n"
|
||||
f" HostName {hostname}\n"
|
||||
f" User {user}\n"
|
||||
f" Port {port}\n"
|
||||
f" IdentityAgent {public_socket}\n"
|
||||
f" ProxyCommand socat - PROXY:{proxy_host}:%h:%p,proxyport={proxy_port}\n"
|
||||
f"\n"
|
||||
)
|
||||
with config_file.open("a") as f:
|
||||
f.write(block)
|
||||
|
||||
if known_host_key:
|
||||
entries_to_write: list[str] = []
|
||||
if port == "22":
|
||||
entries_to_write.append(f"{name} {known_host_key}\n")
|
||||
if hostname != name:
|
||||
entries_to_write.append(f"{hostname} {known_host_key}\n")
|
||||
else:
|
||||
entries_to_write.append(f"[{name}]:{port} {known_host_key}\n")
|
||||
if hostname != name:
|
||||
entries_to_write.append(f"[{hostname}]:{port} {known_host_key}\n")
|
||||
with known_hosts_file.open("a") as f:
|
||||
for e in entries_to_write:
|
||||
f.write(e)
|
||||
|
||||
# Boot the agent, load each key, delete the key files, then
|
||||
# start the root-owned socat forwarder. One docker exec so the
|
||||
# whole sequence is atomic.
|
||||
info(f"starting in-container ssh-agent at {agent_socket} (forwarded via {public_socket})")
|
||||
setup_lines = [
|
||||
"set -eu",
|
||||
f"ssh-agent -a {agent_socket} >/dev/null",
|
||||
]
|
||||
for kp in container_key_paths:
|
||||
setup_lines.append(f"SSH_AUTH_SOCK={agent_socket} ssh-add {kp}")
|
||||
setup_lines.append(f"rm -f {kp}")
|
||||
setup_lines.append(f"rmdir {keys_dir} 2>/dev/null || true")
|
||||
# Forwarder: socat (uid 0) connects to the agent on node's behalf.
|
||||
setup_lines.append(
|
||||
f"nohup socat UNIX-LISTEN:{public_socket},fork,reuseaddr,mode=666 "
|
||||
f"UNIX-CONNECT:{agent_socket} </dev/null >/dev/null 2>&1 &"
|
||||
)
|
||||
# Wait briefly for the forwarder to bind.
|
||||
setup_lines.extend([
|
||||
"i=0",
|
||||
"while [ $i -lt 20 ]; do",
|
||||
f" [ -S {public_socket} ] && break",
|
||||
" i=$((i + 1))",
|
||||
" sleep 0.1",
|
||||
"done",
|
||||
f"[ -S {public_socket} ] || {{ echo 'claude-bottle: socat forwarder failed to bind {public_socket}' >&2; exit 1; }}",
|
||||
])
|
||||
setup_script = "\n".join(setup_lines) + "\n"
|
||||
subprocess.run(
|
||||
["docker", "exec", "-u", "0", container, "sh", "-c", setup_script],
|
||||
check=True,
|
||||
)
|
||||
|
||||
info(f"writing {container_ssh}/config")
|
||||
subprocess.run(
|
||||
["docker", "cp", str(config_file), f"{container}:{container_ssh}/config"],
|
||||
stdout=subprocess.DEVNULL,
|
||||
check=True,
|
||||
)
|
||||
docker_mod.docker_exec_root(container, ["chown", "node:node", f"{container_ssh}/config"])
|
||||
docker_mod.docker_exec_root(container, ["chmod", "600", f"{container_ssh}/config"])
|
||||
|
||||
if known_hosts_file.stat().st_size > 0:
|
||||
info(f"writing {container_ssh}/known_hosts")
|
||||
subprocess.run(
|
||||
["docker", "cp", str(known_hosts_file), f"{container}:{container_ssh}/known_hosts"],
|
||||
stdout=subprocess.DEVNULL,
|
||||
check=True,
|
||||
)
|
||||
docker_mod.docker_exec_root(container, ["chown", "node:node", f"{container_ssh}/known_hosts"])
|
||||
docker_mod.docker_exec_root(container, ["chmod", "600", f"{container_ssh}/known_hosts"])
|
||||
@@ -0,0 +1,226 @@
|
||||
# PRD 0004: Split out provisioners
|
||||
|
||||
- **Status:** Draft
|
||||
- **Author:** didericis
|
||||
- **Created:** 2026-05-11
|
||||
|
||||
## Summary
|
||||
|
||||
Break `claude_bottle/backend/docker/backend.py` (664 lines) apart by
|
||||
moving the four provisioner methods — `provision_prompt`,
|
||||
`provision_skills`, `provision_ssh`, `provision_git` — out of
|
||||
`DockerBottleBackend` into their own modules under
|
||||
`claude_bottle/backend/docker/provision/`. The abstract base in
|
||||
`claude_bottle/backend/__init__.py` keeps the same four-method
|
||||
contract; only the Docker implementation changes shape.
|
||||
|
||||
## Problem
|
||||
|
||||
`DockerBottleBackend` is doing too much in one file. After PRD 0003
|
||||
landed, the class owns:
|
||||
|
||||
- `prepare` — name resolution, validation, scratch file writes
|
||||
- `launch` — image build, network creation, sidecar lifecycle,
|
||||
`docker run`, teardown
|
||||
- `_run_agent_container` — argv assembly + name-conflict retry
|
||||
- `provision_prompt` / `provision_skills` / `provision_ssh` /
|
||||
`provision_git` — four host→container copy paths
|
||||
- `prepare_cleanup` / `cleanup` / `list_active` — orphan handling
|
||||
|
||||
The provisioners are the largest single chunk. `provision_ssh` alone
|
||||
is ~150 lines because it sets up a root-staged keyring, an in-container
|
||||
`ssh-agent`, and a `socat` forwarder so node (uid 1000) can talk to a
|
||||
root-owned agent socket without ptrace access. That logic is
|
||||
self-contained — it touches the container via `docker exec` and
|
||||
`docker cp` and reads from `BottlePlan` — but it sits in the same file
|
||||
as image build and cleanup, which makes the file hard to scan and
|
||||
invites unrelated changes to land in the same diff.
|
||||
|
||||
The provisioners are also the most likely place for new backends to
|
||||
diverge. A future fly.io backend would not run `ssh-agent` in a
|
||||
sidecar this way; an Apple `container` backend might. Pulling each
|
||||
provisioner into its own module makes the per-backend variation a
|
||||
file boundary, not a method boundary inside a god-class.
|
||||
|
||||
## Goals / Success Criteria
|
||||
|
||||
The feature works when all of the following are observable:
|
||||
|
||||
- `cli.py start` produces a byte-identical container topology, env,
|
||||
skills layout, SSH config, and `.git` copy as before the split. No
|
||||
user-visible behavior change.
|
||||
- `DockerBottleBackend` in `backend.py` is under ~350 lines, with the
|
||||
four provisioner methods reduced to thin dispatchers that delegate
|
||||
to the per-provisioner modules.
|
||||
- The full test suite passes unchanged (unit + integration + canary).
|
||||
|
||||
The feature is **done** when all of the following ship:
|
||||
|
||||
- A new `claude_bottle/backend/docker/provision/` subpackage exists
|
||||
with one module per provisioner: `prompt.py`, `skills.py`, `ssh.py`,
|
||||
`git.py`. Each exports a single top-level function taking
|
||||
`(plan: DockerBottlePlan, target: str)` and returning the same type
|
||||
the current method returns (`str | None` for prompt, `None` for the
|
||||
others).
|
||||
- `DockerBottleBackend.provision_prompt` / `provision_skills` /
|
||||
`provision_ssh` / `provision_git` each become one-line delegations
|
||||
to the new module functions.
|
||||
- The abstract `BottleBackend.provision_*` signatures in
|
||||
`claude_bottle/backend/__init__.py` are unchanged. The
|
||||
`BottleBackend.provision` orchestration in the base class is
|
||||
unchanged.
|
||||
- No top-level CLI code or other backend gains a direct import of the
|
||||
provisioner modules — the only call site is
|
||||
`DockerBottleBackend.provision_*`.
|
||||
|
||||
## Non-goals
|
||||
|
||||
- No change to *what* the provisioners do. The SSH provisioning's
|
||||
root-keyring + ssh-agent + socat-bridge design stays exactly as it
|
||||
is. The skills `docker cp <src>/. <dst>/` pattern stays. The
|
||||
`.git` copy stays gated on `spec.copy_cwd` + cwd having a `.git`.
|
||||
- No replacement of `launch`'s ad-hoc `state: dict[str, str]`
|
||||
teardown with `contextlib.ExitStack`. That cleanup is worthwhile
|
||||
but is a separate change.
|
||||
- No deduplication of the two name-conflict retry loops (one in
|
||||
`prepare`, one in `_run_agent_container`).
|
||||
- No removal of the `os.environ["CLAUDE_CODE_OAUTH_TOKEN"]` mutation
|
||||
in `_run_agent_container`. That's a parent-process side effect
|
||||
worth fixing, but it's outside the provisioner split.
|
||||
- No new abstract base class for provisioners (no `Provisioner` ABC).
|
||||
The four functions stay module-level; the abstract surface is the
|
||||
four methods on `BottleBackend`. Introducing a `Provisioner` type
|
||||
would be premature with one backend.
|
||||
- No change to the `BottleBackend.provision_*` method names or
|
||||
signatures. Callers continue to invoke them on the backend
|
||||
instance.
|
||||
|
||||
## Scope
|
||||
|
||||
### In scope
|
||||
|
||||
- New `claude_bottle/backend/docker/provision/` subpackage with
|
||||
`__init__.py`, `prompt.py`, `skills.py`, `ssh.py`, `git.py`.
|
||||
- Moving the four method bodies out of
|
||||
`DockerBottleBackend` into the new modules verbatim, adjusting only
|
||||
what's needed to make them free functions: `self` becomes implicit
|
||||
via the `plan` argument; private helpers move with their primary
|
||||
caller; imports of `docker_mod`, `network_mod`, `pipelock`,
|
||||
`expand_tilde`, etc. follow them.
|
||||
- Reducing the `provision_*` methods on `DockerBottleBackend` to
|
||||
one-line delegations.
|
||||
- Updating any tests that monkeypatch
|
||||
`DockerBottleBackend.provision_*` to monkeypatch the new module
|
||||
functions instead (if any do — most existing tests don't reach
|
||||
into provisioning).
|
||||
|
||||
### Out of scope
|
||||
|
||||
- `prepare`, `launch`, `_run_agent_container`, `prepare_cleanup`,
|
||||
`cleanup`, `list_active`. These stay in `backend.py`.
|
||||
- The `validate_skills` and `validate_ssh_entries` host-side
|
||||
validation methods. They run from `prepare` (before the y/N), not
|
||||
from `provision`, so they belong with `prepare` and stay on the
|
||||
class.
|
||||
- Any change to the abstract `BottleBackend` base in
|
||||
`backend/__init__.py`.
|
||||
- Cross-backend reuse of provisioner code. There's no second backend
|
||||
yet; designing for one before it exists would be premature.
|
||||
|
||||
## Proposed Design
|
||||
|
||||
### New layout
|
||||
|
||||
```
|
||||
claude_bottle/backend/docker/
|
||||
backend.py # DockerBottleBackend (slimmer)
|
||||
bottle.py
|
||||
bottle_plan.py
|
||||
bottle_cleanup_plan.py
|
||||
network.py
|
||||
pipelock.py
|
||||
util.py
|
||||
provision/
|
||||
__init__.py # empty; explicit imports per module
|
||||
prompt.py # provision_prompt(plan, target) -> str | None
|
||||
skills.py # provision_skills(plan, target) -> None
|
||||
ssh.py # provision_ssh(plan, target) -> None
|
||||
git.py # provision_git(plan, target) -> None
|
||||
```
|
||||
|
||||
### Function signatures
|
||||
|
||||
Each module exports one top-level function with the same shape:
|
||||
|
||||
```python
|
||||
# prompt.py
|
||||
def provision_prompt(plan: DockerBottlePlan, target: str) -> str | None: ...
|
||||
|
||||
# skills.py
|
||||
def provision_skills(plan: DockerBottlePlan, target: str) -> None: ...
|
||||
|
||||
# ssh.py
|
||||
def provision_ssh(plan: DockerBottlePlan, target: str) -> None: ...
|
||||
|
||||
# git.py
|
||||
def provision_git(plan: DockerBottlePlan, target: str) -> None: ...
|
||||
```
|
||||
|
||||
`target` is the resolved container name (same value the current
|
||||
methods receive). The functions are free functions, not methods, so
|
||||
they don't accept `self`.
|
||||
|
||||
### Delegation on the backend
|
||||
|
||||
`DockerBottleBackend.provision_*` shrinks to:
|
||||
|
||||
```python
|
||||
from .provision import prompt as _prompt
|
||||
from .provision import skills as _skills
|
||||
from .provision import ssh as _ssh
|
||||
from .provision import git as _git
|
||||
|
||||
class DockerBottleBackend(BottleBackend):
|
||||
...
|
||||
def provision_prompt(self, plan: BottlePlan, target: str) -> str | None:
|
||||
assert isinstance(plan, DockerBottlePlan)
|
||||
return _prompt.provision_prompt(plan, target)
|
||||
|
||||
def provision_skills(self, plan: BottlePlan, target: str) -> None:
|
||||
assert isinstance(plan, DockerBottlePlan)
|
||||
_skills.provision_skills(plan, target)
|
||||
|
||||
# ...same for ssh, git
|
||||
```
|
||||
|
||||
The `isinstance` assert stays on the method (the abstract base passes
|
||||
`BottlePlan`, not `DockerBottlePlan`) so the module functions can
|
||||
take the concrete type and skip re-checking.
|
||||
|
||||
### Existing code touched
|
||||
|
||||
- **`claude_bottle/backend/docker/backend.py`** — four method
|
||||
bodies move out; method definitions stay as one-line delegations.
|
||||
Imports for `pipelock_proxy_host_port`, `expand_tilde`, etc., that
|
||||
are only used by the moved bodies migrate with them.
|
||||
- **`claude_bottle/backend/docker/__init__.py`** — no change. The
|
||||
public surface (`DockerBottleBackend`) is unchanged.
|
||||
- **`claude_bottle/backend/__init__.py`** — no change.
|
||||
- **`tests/`** — no expected change. Existing tests exercise the
|
||||
backend via `DockerBottleBackend` or the CLI surface; they don't
|
||||
reach into provisioners directly. Verify after the move and only
|
||||
update if a test breaks.
|
||||
|
||||
### Data model changes
|
||||
|
||||
None.
|
||||
|
||||
### External dependencies
|
||||
|
||||
None new.
|
||||
|
||||
## References
|
||||
|
||||
- PRD 0003 (`docs/prds/0003-bottle-backend-abstraction.md`) —
|
||||
establishes the four-method provisioner contract being preserved
|
||||
here.
|
||||
Reference in New Issue
Block a user