fix(docker): surface sidecar docker errors + probe for name orphans
test / unit (pull_request) Successful in 19s
test / integration (pull_request) Successful in 26s

Two failure-clarity paper cuts from the cred-proxy debugging:

1. Every docker create / start / network-connect call on the three
   sidecars (pipelock, git-gate, cred-proxy) was piping stderr to
   DEVNULL. A stuck orphan from a previous run produced "failed to
   create pipelock sidecar claude-bottle-pipelock-demo" with no
   pointer at the real cause ("Conflict. The container name ... is
   already in use ..."). Switch each call to capture_output=True and
   include the stripped stderr in the die() message.

2. The agent container had a container_exists() probe in resolve_plan
   that fails fast with a hint, but the sidecars (whose names are
   deterministic from the slug) didn't. So an orphan caused launch()
   to bail deep inside docker create. Add a probe in resolve_plan for
   each sidecar this launch will actually try to create: pipelock
   always; git-gate when bottle.git is non-empty; cred-proxy when
   bottle.cred_proxy.routes is non-empty. Die with a "./cli.py
   cleanup" pointer.

Smoke-tested with an orphaned pipelock-<slug> container — the new
probe fires with the expected hint before any sidecar build/start
work begins.
This commit is contained in:
2026-05-24 12:33:54 -04:00
parent 2990c3c903
commit 0eb482daf0
4 changed files with 104 additions and 61 deletions
+21 -21
View File
@@ -161,14 +161,14 @@ class DockerCredProxy(CredProxy):
child_env: dict[str, str] = {**os.environ, **token_values}
if subprocess.run(
create_args,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
env=child_env,
check=False,
).returncode != 0:
die(f"failed to create cred-proxy sidecar {name}")
create_result = subprocess.run(
create_args, capture_output=True, text=True, env=child_env, check=False,
)
if create_result.returncode != 0:
die(
f"failed to create cred-proxy sidecar {name}: "
f"{create_result.stderr.strip()}"
)
cps: list[tuple[str, str, str]] = [
(str(plan.routes_path), CRED_PROXY_ROUTES_IN_CONTAINER, "routes.json"),
@@ -202,12 +202,11 @@ class DockerCredProxy(CredProxy):
f"{cp_result.stderr.strip()}"
)
if subprocess.run(
connect_result = subprocess.run(
["docker", "network", "connect", plan.egress_network, name],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=False,
).returncode != 0:
capture_output=True, text=True, check=False,
)
if connect_result.returncode != 0:
subprocess.run(
["docker", "rm", "-f", name],
stdout=subprocess.DEVNULL,
@@ -216,22 +215,23 @@ class DockerCredProxy(CredProxy):
)
die(
f"failed to attach cred-proxy sidecar {name} to egress network "
f"{plan.egress_network}"
f"{plan.egress_network}: {connect_result.stderr.strip()}"
)
if subprocess.run(
["docker", "start", name],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=False,
).returncode != 0:
start_result = subprocess.run(
["docker", "start", name], capture_output=True, text=True, check=False,
)
if start_result.returncode != 0:
subprocess.run(
["docker", "rm", "-f", name],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=False,
)
die(f"failed to start cred-proxy sidecar {name}")
die(
f"failed to start cred-proxy sidecar {name}: "
f"{start_result.stderr.strip()}"
)
return name
+21 -20
View File
@@ -110,13 +110,14 @@ class DockerGitGate(GitGate):
for host, ip in git_gate_aggregate_extra_hosts(plan.upstreams).items():
create_args.extend(["--add-host", f"{host}:{ip}"])
create_args.append(GIT_GATE_IMAGE)
if subprocess.run(
create_args,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=False,
).returncode != 0:
die(f"failed to create git-gate sidecar {name}")
create_result = subprocess.run(
create_args, capture_output=True, text=True, check=False,
)
if create_result.returncode != 0:
die(
f"failed to create git-gate sidecar {name}: "
f"{create_result.stderr.strip()}"
)
# Order matters: entrypoint + hook first so they're present
# when docker start fires. Per-upstream creds afterwards.
@@ -166,12 +167,11 @@ class DockerGitGate(GitGate):
f"{cp_result.stderr.strip()}"
)
if subprocess.run(
connect_result = subprocess.run(
["docker", "network", "connect", plan.egress_network, name],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=False,
).returncode != 0:
capture_output=True, text=True, check=False,
)
if connect_result.returncode != 0:
subprocess.run(
["docker", "rm", "-f", name],
stdout=subprocess.DEVNULL,
@@ -180,22 +180,23 @@ class DockerGitGate(GitGate):
)
die(
f"failed to attach git-gate sidecar {name} to egress network "
f"{plan.egress_network}"
f"{plan.egress_network}: {connect_result.stderr.strip()}"
)
if subprocess.run(
["docker", "start", name],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=False,
).returncode != 0:
start_result = subprocess.run(
["docker", "start", name], capture_output=True, text=True, check=False,
)
if start_result.returncode != 0:
subprocess.run(
["docker", "rm", "-f", name],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=False,
)
die(f"failed to start git-gate sidecar {name}")
die(
f"failed to start git-gate sidecar {name}: "
f"{start_result.stderr.strip()}"
)
return name
+32 -17
View File
@@ -110,8 +110,14 @@ class DockerPipelockProxy(PipelockProxy):
"run", "--config", "/etc/pipelock.yaml",
"--listen", f"0.0.0.0:{PIPELOCK_PORT}",
]
if subprocess.run(create_args, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False).returncode != 0:
die(f"failed to create pipelock sidecar {name}")
create_result = subprocess.run(
create_args, capture_output=True, text=True, check=False,
)
if create_result.returncode != 0:
die(
f"failed to create pipelock sidecar {name}: "
f"{create_result.stderr.strip()}"
)
for src, dst, label in (
(plan.yaml_path, "/etc/pipelock.yaml", "yaml"),
@@ -131,23 +137,32 @@ class DockerPipelockProxy(PipelockProxy):
)
die(f"failed to copy pipelock {label} into {name}: {cp_result.stderr.strip()}")
if subprocess.run(
connect_result = subprocess.run(
["docker", "network", "connect", plan.egress_network, name],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=False,
).returncode != 0:
subprocess.run(["docker", "rm", "-f", name], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False)
die(f"failed to attach pipelock sidecar {name} to egress network {plan.egress_network}")
capture_output=True, text=True, check=False,
)
if connect_result.returncode != 0:
subprocess.run(
["docker", "rm", "-f", name],
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False,
)
die(
f"failed to attach pipelock sidecar {name} to egress network "
f"{plan.egress_network}: {connect_result.stderr.strip()}"
)
if subprocess.run(
["docker", "start", name],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=False,
).returncode != 0:
subprocess.run(["docker", "rm", "-f", name], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False)
die(f"failed to start pipelock sidecar {name}")
start_result = subprocess.run(
["docker", "start", name], capture_output=True, text=True, check=False,
)
if start_result.returncode != 0:
subprocess.run(
["docker", "rm", "-f", name],
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False,
)
die(
f"failed to start pipelock sidecar {name}: "
f"{start_result.stderr.strip()}"
)
return name
+30 -3
View File
@@ -19,9 +19,13 @@ from ...log import die
from .. import BottleSpec
from . import util as docker_mod
from .bottle_plan import DockerBottlePlan
from .cred_proxy import DockerCredProxy, cred_proxy_url
from .git_gate import DockerGitGate
from .pipelock import DockerPipelockProxy
from .cred_proxy import (
DockerCredProxy,
cred_proxy_container_name,
cred_proxy_url,
)
from .git_gate import DockerGitGate, git_gate_container_name
from .pipelock import DockerPipelockProxy, pipelock_container_name
def resolve_plan(
@@ -76,6 +80,29 @@ def resolve_plan(
f"clean up old containers with 'docker rm -f <name>'"
)
# Probe sidecar container names for orphans from a previous run.
# Sidecar names are deterministic from the slug; an orphan would
# surface as a docker-create conflict deep inside launch() with no
# actionable hint. Fail fast here with a cleanup pointer instead.
# Only probe sidecars this launch will actually try to create:
# pipelock always; git-gate when bottle.git is non-empty; cred-proxy
# when bottle.cred_proxy.routes is non-empty.
sidecar_probes: list[tuple[str, str]] = [
("pipelock", pipelock_container_name(slug)),
]
if bottle.git:
sidecar_probes.append(("git-gate", git_gate_container_name(slug)))
if bottle.cred_proxy.routes:
sidecar_probes.append(("cred-proxy", cred_proxy_container_name(slug)))
for label, sidecar_name in sidecar_probes:
if docker_mod.container_exists(sidecar_name):
die(
f"{label} sidecar container '{sidecar_name}' already exists. "
f"This is an orphan from a previous run; clean it up with "
f"'./cli.py cleanup' (or 'docker rm -f {sidecar_name}') and "
f"retry."
)
env_file = stage_dir / "agent.env"
prompt_file = stage_dir / "prompt.txt"
prompt_file.write_text("")