feat(ssh-gate)!: remove ssh-gate sidecar and provisioner (PRD 0009)

Delete claude_bottle/ssh_gate.py, the DockerSSHGate sidecar,
and the provision_ssh provisioner (~/.ssh/config + ssh-agent
wiring). Unwire the gate from the abstract BottleBackend
(provision orchestration drops the ssh step,
_validate_ssh_entries goes away) and from the Docker backend
(prepare/launch lose the `gate` kwarg, bottle_plan drops the
gate_plan field, dry-run JSON drops the ssh_hosts / ssh_gate
keys, y/N preflight drops the ssh-hosts block). cli/info now
prints declared git remotes instead of ssh hosts. pipelock's
docstring picks up the git-gate framing now that there's no
PRD-0007 boundary to call out.

BREAKING (dry-run JSON): the `ssh_hosts` and `ssh_gate` keys
are gone from `start --dry-run --format=json`. Consumers should
read `git_remotes` / `git_gate` instead.
This commit is contained in:
2026-05-12 23:49:58 -04:00
parent c403d137b6
commit 3d66ad2a86
10 changed files with 23 additions and 595 deletions
-8
View File
@@ -29,8 +29,6 @@ from .provision import ca as _ca
from .provision import git as _git
from .provision import prompt as _prompt
from .provision import skills as _skills
from .provision import ssh as _ssh
from .ssh_gate import DockerSSHGate
class DockerBottleBackend(BottleBackend["DockerBottlePlan", "DockerBottleCleanupPlan"]):
@@ -41,7 +39,6 @@ class DockerBottleBackend(BottleBackend["DockerBottlePlan", "DockerBottleCleanup
def __init__(self) -> None:
self._proxy = DockerPipelockProxy()
self._gate = DockerSSHGate()
self._git_gate = DockerGitGate()
def _resolve_plan(self, spec: BottleSpec, *, stage_dir: Path) -> DockerBottlePlan:
@@ -49,7 +46,6 @@ class DockerBottleBackend(BottleBackend["DockerBottlePlan", "DockerBottleCleanup
spec,
stage_dir=stage_dir,
proxy=self._proxy,
gate=self._gate,
git_gate=self._git_gate,
)
@@ -58,7 +54,6 @@ class DockerBottleBackend(BottleBackend["DockerBottlePlan", "DockerBottleCleanup
with _launch.launch(
plan,
proxy=self._proxy,
gate=self._gate,
git_gate=self._git_gate,
provision=self.provision,
) as bottle:
@@ -73,9 +68,6 @@ class DockerBottleBackend(BottleBackend["DockerBottlePlan", "DockerBottleCleanup
def provision_skills(self, plan: DockerBottlePlan, target: str) -> None:
_skills.provision_skills(plan, target)
def provision_ssh(self, plan: DockerBottlePlan, target: str) -> None:
_ssh.provision_ssh(plan, target)
def provision_git(self, plan: DockerBottlePlan, target: str) -> None:
_git.provision_git(plan, target)
@@ -15,7 +15,6 @@ from ...git_gate import GitGatePlan
from ...log import info
from ...manifest import Agent, Bottle
from ...pipelock import PipelockProxyPlan, pipelock_effective_allowlist
from ...ssh_gate import SSHGatePlan
from .. import BottlePlan
@@ -27,7 +26,6 @@ class _PlanView:
agent: Agent
bottle: Bottle
env_names: list[str]
ssh_hosts: list[str]
git_names: list[str]
prompt_first_line: str
@@ -52,7 +50,6 @@ class DockerBottlePlan(BottlePlan):
forwarded_env: dict[str, str] = field(repr=False)
prompt_file: Path
proxy_plan: PipelockProxyPlan
gate_plan: SSHGatePlan
git_gate_plan: GitGatePlan
allowlist_summary: str
use_runsc: bool
@@ -69,7 +66,6 @@ class DockerBottlePlan(BottlePlan):
agent=agent,
bottle=bottle,
env_names=env_names,
ssh_hosts=[e.Host for e in bottle.ssh],
git_names=[e.Name for e in bottle.git],
prompt_first_line=agent.prompt.splitlines()[0] if agent.prompt else "",
)
@@ -94,16 +90,6 @@ class DockerBottlePlan(BottlePlan):
info("skills : " + (" ".join(v.agent.skills) if v.agent.skills else "(none)"))
info(f"docker runtime : {runtime_label}")
info(f"bottle : {v.agent.bottle}")
if v.ssh_hosts:
info(f" ssh hosts : {', '.join(v.ssh_hosts)}")
gate_lines = [
f"{u.bottle_host_alias} -> {u.upstream_host}:{u.upstream_port} "
f"(listen {u.listen_port})"
for u in self.gate_plan.upstreams
]
info(f" ssh gate : {'; '.join(gate_lines)}")
else:
info(" ssh hosts : (none)")
if v.git_names:
info(f" git remotes : {', '.join(v.git_names)}")
git_lines = [
@@ -136,15 +122,6 @@ class DockerBottlePlan(BottlePlan):
"runtime": "runsc" if self.use_runsc else "runc",
"env_names": v.env_names,
"skills": list(v.agent.skills),
"ssh_hosts": v.ssh_hosts,
"ssh_gate": [
{
"host": u.bottle_host_alias,
"upstream": f"{u.upstream_host}:{u.upstream_port}",
"listen_port": u.listen_port,
}
for u in self.gate_plan.upstreams
],
"git_remotes": v.git_names,
"git_gate": [
{
-17
View File
@@ -25,7 +25,6 @@ from .bottle_plan import DockerBottlePlan
from .git_gate import DockerGitGate
from .pipelock import DockerPipelockProxy, pipelock_proxy_url, pipelock_tls_init
from .provision.ca import AGENT_CA_BUNDLE, AGENT_CA_PATH
from .ssh_gate import DockerSSHGate
# Where the repo root lives, for `docker build` context. Computed once.
@@ -37,7 +36,6 @@ def launch(
plan: DockerBottlePlan,
*,
proxy: DockerPipelockProxy,
gate: DockerSSHGate,
git_gate: DockerGitGate,
provision: Callable[[DockerBottlePlan, str], str | None],
) -> Generator[DockerBottle, None, None]:
@@ -89,21 +87,6 @@ def launch(
pipelock_name = proxy.start(plan.proxy_plan)
stack.callback(proxy.stop, pipelock_name)
# SSH egress gate (PRD 0007). One sidecar per agent, only
# brought up when the bottle has ssh entries. Lives on the
# same internal + egress networks pipelock straddles; the
# agent dials it by container name (DNS works on --internal,
# confirmed by the PRD 0007 spike).
if plan.gate_plan.upstreams:
gate_plan = dataclasses.replace(
plan.gate_plan,
internal_network=internal_network,
egress_network=egress_network,
)
plan = dataclasses.replace(plan, gate_plan=gate_plan)
gate_name = gate.start(plan.gate_plan)
stack.callback(gate.stop, gate_name)
# Git gate (PRD 0008). One sidecar per agent, only brought up
# when the bottle has git entries. Same internal + egress
# network attachment as the other sidecars; agent dials it as
+2 -6
View File
@@ -21,7 +21,6 @@ from . import util as docker_mod
from .bottle_plan import DockerBottlePlan
from .git_gate import DockerGitGate
from .pipelock import DockerPipelockProxy
from .ssh_gate import DockerSSHGate
def resolve_plan(
@@ -29,12 +28,11 @@ def resolve_plan(
*,
stage_dir: Path,
proxy: DockerPipelockProxy,
gate: DockerSSHGate,
git_gate: DockerGitGate,
) -> DockerBottlePlan:
"""Resolve Docker-specific names and write scratch files. Trusts
that the agent and its skills/SSH keys are present — validation
already ran in the base class."""
that the agent and its skills/git-gate keys are present —
validation already ran in the base class."""
docker_mod.require_docker()
manifest = spec.manifest
@@ -82,7 +80,6 @@ def resolve_plan(
prompt_file.chmod(0o600)
proxy_plan = proxy.prepare(bottle, slug, stage_dir)
gate_plan = gate.prepare(bottle, slug, stage_dir)
git_gate_plan = git_gate.prepare(bottle, slug, stage_dir)
resolved = resolve_env(manifest, spec.agent_name)
# Everything that should reach the bottle by-name (so its value
@@ -111,7 +108,6 @@ def resolve_plan(
forwarded_env=forwarded_env,
prompt_file=prompt_file,
proxy_plan=proxy_plan,
gate_plan=gate_plan,
git_gate_plan=git_gate_plan,
allowlist_summary=allowlist_summary,
use_runsc=use_runsc,
@@ -1,199 +0,0 @@
"""Set up SSH inside a running Docker bottle.
This is the most involved provisioner. The end state in the container:
- ~/.ssh/config + ~/.ssh/known_hosts owned by node, mode 600
- ssh-agent running as root with each key loaded; agent socket at
/run/claude-bottle-agent.sock
- socat forwarder (also root) bridging the agent socket to
/run/claude-bottle-agent-public.sock (mode 666) so node can talk
to the agent despite ssh-agent's SO_PEERCRED UID match
- on-disk key files deleted after `ssh-add`; the bytes only live in
the agent process's memory thereafter
See the `provision_ssh` docstring for the full isolation rationale."""
from __future__ import annotations
import os
import subprocess
from ....log import die, info
from ....util import expand_tilde
from .. import util as docker_mod
from ..bottle_plan import DockerBottlePlan
from ..ssh_gate import ssh_gate_host
def provision_ssh(plan: DockerBottlePlan, target: str) -> None:
"""Set up SSH in the container so node can authenticate using
each entry's key without the key file being readable by node.
No-op when the bottle has no SSH entries.
Isolation strategy:
- Keys live at /root/.claude-bottle-keys/ (mode 700,
root-owned). /root is mode 700 in node:22-slim, so node
(uid 1000) can't even traverse in.
- ssh-agent runs as root, listening on
/run/claude-bottle-agent.sock. Each key is loaded with
ssh-add, then deleted; the bytes now live only in the
agent process's memory.
- ssh-agent's SO_PEERCRED-based UID match rejects every
connection whose peer euid is neither 0 nor the agent's.
To bridge that, a root-owned socat forwarder listens on
/run/claude-bottle-agent-public.sock (mode 666) and
proxies bytes to the real agent socket.
- node can't ptrace root-owned agent or socat, so
/proc/<pid>/mem is off-limits and key bytes never leave
root-owned memory.
- ~/.ssh/config in node's home points each Host at the
public socket via IdentityAgent.
Why an in-container agent (not bind-mounted from host):
Docker Desktop on macOS does not forward Unix-domain socket
connect() across the VM boundary — connect() returns
ENOTSUP. Running ssh-agent inside the container sidesteps
that entirely.
Limitation: keys must be passphrase-less. ssh-add prompts on
/dev/tty for passphrases, but our docker exec has no TTY."""
bottle = plan.spec.manifest.bottle_for(plan.spec.agent_name)
if not bottle.ssh:
return
container = target
gate_target = ssh_gate_host(plan.slug)
container_home = os.environ.get("CLAUDE_BOTTLE_CONTAINER_HOME", "/home/node")
container_ssh = f"{container_home}/.ssh"
agent_socket = "/run/claude-bottle-agent.sock"
public_socket = "/run/claude-bottle-agent-public.sock"
keys_dir = "/root/.claude-bottle-keys"
# Per-entry listen ports come off the gate plan (PRD 0007).
# Indexed by the bottle.ssh entry's Host alias so each ssh_config
# block knows which port its forwarder lives on.
upstreams_by_alias = {u.bottle_host_alias: u for u in plan.gate_plan.upstreams}
if set(upstreams_by_alias) != {e.Host for e in bottle.ssh}:
die(
"ssh-gate upstream table is out of sync with bottle.ssh; "
"this is an internal bug"
)
# ~/.ssh for node (700, owned by node).
docker_mod.docker_exec_root(container, ["mkdir", "-p", container_ssh])
docker_mod.docker_exec_root(container, ["chown", "node:node", container_ssh])
docker_mod.docker_exec_root(container, ["chmod", "700", container_ssh])
# /root/.claude-bottle-keys for root (700, root-owned).
docker_mod.docker_exec_root(container, ["mkdir", "-p", keys_dir])
docker_mod.docker_exec_root(container, ["chown", "root:root", keys_dir])
docker_mod.docker_exec_root(container, ["chmod", "700", keys_dir])
config_file = plan.stage_dir / "ssh_config"
known_hosts_file = plan.stage_dir / "ssh_known_hosts"
config_file.write_text("")
config_file.chmod(0o600)
known_hosts_file.write_text("")
known_hosts_file.chmod(0o600)
container_key_paths: list[str] = []
for entry in bottle.ssh:
name = entry.Host
key = expand_tilde(entry.IdentityFile)
hostname = entry.Hostname
user = entry.User
known_host_key = entry.KnownHostKey
upstream = upstreams_by_alias[name]
listen_port = upstream.listen_port
key_basename = os.path.basename(key)
container_key_path = f"{keys_dir}/{key_basename}"
info(f"copying ssh key for '{name}' -> {container} (root-only staging)")
subprocess.run(
["docker", "cp", key, f"{container}:{container_key_path}"],
stdout=subprocess.DEVNULL,
check=True,
)
docker_mod.docker_exec_root(container, ["chown", "root:root", container_key_path])
docker_mod.docker_exec_root(container, ["chmod", "600", container_key_path])
container_key_paths.append(container_key_path)
# Each Host block points at the gate container + its
# per-entry listen port. HostKeyAlias makes ssh validate
# the host key against `hostname` (the real upstream
# name) instead of the gate container; CheckHostIP=no
# skips the resolved-IP lookup, which would also point at
# the gate.
block = (
f"Host {name}\n"
f" HostName {gate_target}\n"
f" User {user}\n"
f" Port {listen_port}\n"
f" IdentityAgent {public_socket}\n"
f" HostKeyAlias {hostname}\n"
f" CheckHostIP no\n"
f"\n"
)
with config_file.open("a") as f:
f.write(block)
if known_host_key:
# HostKeyAlias makes ssh look up known_hosts under
# `hostname` (the upstream's real name / IP literal),
# not the gate container. One unambiguous entry per
# ssh entry.
with known_hosts_file.open("a") as f:
f.write(f"{hostname} {known_host_key}\n")
# Boot the agent, load each key, delete the key files, then
# start the root-owned socat forwarder. One docker exec so the
# whole sequence is atomic.
info(f"starting in-container ssh-agent at {agent_socket} (forwarded via {public_socket})")
setup_lines = [
"set -eu",
f"ssh-agent -a {agent_socket} >/dev/null",
]
for kp in container_key_paths:
setup_lines.append(f"SSH_AUTH_SOCK={agent_socket} ssh-add {kp}")
setup_lines.append(f"rm -f {kp}")
setup_lines.append(f"rmdir {keys_dir} 2>/dev/null || true")
# Forwarder: socat (uid 0) connects to the agent on node's behalf.
setup_lines.append(
f"nohup socat UNIX-LISTEN:{public_socket},fork,reuseaddr,mode=666 "
f"UNIX-CONNECT:{agent_socket} </dev/null >/dev/null 2>&1 &"
)
# Wait briefly for the forwarder to bind.
setup_lines.extend([
"i=0",
"while [ $i -lt 20 ]; do",
f" [ -S {public_socket} ] && break",
" i=$((i + 1))",
" sleep 0.1",
"done",
f"[ -S {public_socket} ] || {{ echo 'claude-bottle: socat forwarder failed to bind {public_socket}' >&2; exit 1; }}",
])
setup_script = "\n".join(setup_lines) + "\n"
subprocess.run(
["docker", "exec", "-u", "0", container, "sh", "-c", setup_script],
check=True,
)
info(f"writing {container_ssh}/config")
subprocess.run(
["docker", "cp", str(config_file), f"{container}:{container_ssh}/config"],
stdout=subprocess.DEVNULL,
check=True,
)
docker_mod.docker_exec_root(container, ["chown", "node:node", f"{container_ssh}/config"])
docker_mod.docker_exec_root(container, ["chmod", "600", f"{container_ssh}/config"])
if known_hosts_file.stat().st_size > 0:
info(f"writing {container_ssh}/known_hosts")
subprocess.run(
["docker", "cp", str(known_hosts_file), f"{container}:{container_ssh}/known_hosts"],
stdout=subprocess.DEVNULL,
check=True,
)
docker_mod.docker_exec_root(container, ["chown", "node:node", f"{container_ssh}/known_hosts"])
docker_mod.docker_exec_root(container, ["chmod", "600", f"{container_ssh}/known_hosts"])
-159
View File
@@ -1,159 +0,0 @@
"""DockerSSHGate — the Docker-specific lifecycle for the per-agent
SSH egress gate sidecar (PRD 0007). Inherits the platform-agnostic
prepare step (upstream allocation + entrypoint render) from
`SSHGate`."""
from __future__ import annotations
import os
import subprocess
from ...log import die, info, warn
from ...ssh_gate import SSHGate, SSHGatePlan
# alpine/socat pinned by digest. The image is `alpine` + `socat`
# pre-installed; PRD 0007 requires the gate image to be
# self-sufficient at boot (no apk pulls) because the agent-facing
# leg sits on the `--internal` network.
SSH_GATE_IMAGE = os.environ.get(
"CLAUDE_BOTTLE_SSH_GATE_IMAGE",
"alpine/socat@sha256:a26f4bcee25ad4a4096ce91e596c0a2fffcbb51f7fd198dd87a5c86eae66f0e1",
)
# In-container path the entrypoint script lands at after `docker cp`.
# Root path keeps the cp simple — no intermediate directories to
# create.
SSH_GATE_ENTRYPOINT_IN_CONTAINER = "/ssh-gate-entrypoint.sh"
def ssh_gate_container_name(slug: str) -> str:
return f"claude-bottle-ssh-gate-{slug}"
def ssh_gate_host(slug: str) -> str:
"""The hostname the agent's ssh client should connect to. Same as
the container name — Docker's embedded DNS resolves it on the
`--internal` network (verified by the PRD 0007 DNS spike)."""
return ssh_gate_container_name(slug)
class DockerSSHGate(SSHGate):
"""Brings the SSH gate sidecar up and down via Docker."""
def start(self, plan: SSHGatePlan) -> str:
"""Boot the gate sidecar:
1. `docker create` on the internal network with the
canonical name, `--entrypoint /bin/sh`, and the
in-container entrypoint path as the CMD.
2. `docker cp` the entrypoint script in.
3. Attach to the per-agent egress network so socat can dial
upstream.
4. `docker start`.
Returns the container name (the target passed to `.stop`)."""
if not plan.upstreams:
die("DockerSSHGate.start called with no upstreams; caller should skip")
if not plan.internal_network or not plan.egress_network:
die(
"DockerSSHGate.start: internal_network / egress_network must be "
"populated on the plan before start"
)
if not plan.entrypoint_script.is_file():
die(
f"ssh-gate entrypoint script missing at {plan.entrypoint_script}; "
f"SSHGate.prepare must run first"
)
name = ssh_gate_container_name(plan.slug)
info(f"starting ssh-gate sidecar {name} on network {plan.internal_network}")
create_args = [
"docker", "create",
"--name", name,
"--network", plan.internal_network,
"--entrypoint", "/bin/sh",
SSH_GATE_IMAGE,
SSH_GATE_ENTRYPOINT_IN_CONTAINER,
]
if subprocess.run(
create_args,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=False,
).returncode != 0:
die(f"failed to create ssh-gate sidecar {name}")
cp_result = subprocess.run(
[
"docker", "cp",
str(plan.entrypoint_script),
f"{name}:{SSH_GATE_ENTRYPOINT_IN_CONTAINER}",
],
capture_output=True,
text=True,
check=False,
)
if cp_result.returncode != 0:
subprocess.run(
["docker", "rm", "-f", name],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=False,
)
die(
f"failed to copy ssh-gate entrypoint into {name}: "
f"{cp_result.stderr.strip()}"
)
if subprocess.run(
["docker", "network", "connect", plan.egress_network, name],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=False,
).returncode != 0:
subprocess.run(
["docker", "rm", "-f", name],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=False,
)
die(
f"failed to attach ssh-gate sidecar {name} to egress network "
f"{plan.egress_network}"
)
if subprocess.run(
["docker", "start", name],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=False,
).returncode != 0:
subprocess.run(
["docker", "rm", "-f", name],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=False,
)
die(f"failed to start ssh-gate sidecar {name}")
return name
def stop(self, target: str) -> None:
"""Idempotent: missing container is success. `target` is the
container name returned by `.start`."""
if subprocess.run(
["docker", "inspect", target],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=False,
).returncode == 0:
if subprocess.run(
["docker", "rm", "-f", target],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=False,
).returncode != 0:
warn(
f"failed to remove ssh-gate sidecar {target}; "
f"clean up with 'docker rm -f {target}'"
)