fix(docker): surface sidecar docker errors + probe for name orphans
Two failure-clarity paper cuts from the cred-proxy debugging:
1. Every docker create / start / network-connect call on the three
sidecars (pipelock, git-gate, cred-proxy) was piping stderr to
DEVNULL. A stuck orphan from a previous run produced "failed to
create pipelock sidecar claude-bottle-pipelock-demo" with no
pointer at the real cause ("Conflict. The container name ... is
already in use ..."). Switch each call to capture_output=True and
include the stripped stderr in the die() message.
2. The agent container had a container_exists() probe in resolve_plan
that fails fast with a hint, but the sidecars (whose names are
deterministic from the slug) didn't. So an orphan caused launch()
to bail deep inside docker create. Add a probe in resolve_plan for
each sidecar this launch will actually try to create: pipelock
always; git-gate when bottle.git is non-empty; cred-proxy when
bottle.cred_proxy.routes is non-empty. Die with a "./cli.py
cleanup" pointer.
Smoke-tested with an orphaned pipelock-<slug> container — the new
probe fires with the expected hint before any sidecar build/start
work begins.
This commit is contained in:
@@ -161,14 +161,14 @@ class DockerCredProxy(CredProxy):
|
||||
|
||||
child_env: dict[str, str] = {**os.environ, **token_values}
|
||||
|
||||
if subprocess.run(
|
||||
create_args,
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
env=child_env,
|
||||
check=False,
|
||||
).returncode != 0:
|
||||
die(f"failed to create cred-proxy sidecar {name}")
|
||||
create_result = subprocess.run(
|
||||
create_args, capture_output=True, text=True, env=child_env, check=False,
|
||||
)
|
||||
if create_result.returncode != 0:
|
||||
die(
|
||||
f"failed to create cred-proxy sidecar {name}: "
|
||||
f"{create_result.stderr.strip()}"
|
||||
)
|
||||
|
||||
cps: list[tuple[str, str, str]] = [
|
||||
(str(plan.routes_path), CRED_PROXY_ROUTES_IN_CONTAINER, "routes.json"),
|
||||
@@ -202,12 +202,11 @@ class DockerCredProxy(CredProxy):
|
||||
f"{cp_result.stderr.strip()}"
|
||||
)
|
||||
|
||||
if subprocess.run(
|
||||
connect_result = subprocess.run(
|
||||
["docker", "network", "connect", plan.egress_network, name],
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
check=False,
|
||||
).returncode != 0:
|
||||
capture_output=True, text=True, check=False,
|
||||
)
|
||||
if connect_result.returncode != 0:
|
||||
subprocess.run(
|
||||
["docker", "rm", "-f", name],
|
||||
stdout=subprocess.DEVNULL,
|
||||
@@ -216,22 +215,23 @@ class DockerCredProxy(CredProxy):
|
||||
)
|
||||
die(
|
||||
f"failed to attach cred-proxy sidecar {name} to egress network "
|
||||
f"{plan.egress_network}"
|
||||
f"{plan.egress_network}: {connect_result.stderr.strip()}"
|
||||
)
|
||||
|
||||
if subprocess.run(
|
||||
["docker", "start", name],
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
check=False,
|
||||
).returncode != 0:
|
||||
start_result = subprocess.run(
|
||||
["docker", "start", name], capture_output=True, text=True, check=False,
|
||||
)
|
||||
if start_result.returncode != 0:
|
||||
subprocess.run(
|
||||
["docker", "rm", "-f", name],
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
check=False,
|
||||
)
|
||||
die(f"failed to start cred-proxy sidecar {name}")
|
||||
die(
|
||||
f"failed to start cred-proxy sidecar {name}: "
|
||||
f"{start_result.stderr.strip()}"
|
||||
)
|
||||
|
||||
return name
|
||||
|
||||
|
||||
@@ -110,13 +110,14 @@ class DockerGitGate(GitGate):
|
||||
for host, ip in git_gate_aggregate_extra_hosts(plan.upstreams).items():
|
||||
create_args.extend(["--add-host", f"{host}:{ip}"])
|
||||
create_args.append(GIT_GATE_IMAGE)
|
||||
if subprocess.run(
|
||||
create_args,
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
check=False,
|
||||
).returncode != 0:
|
||||
die(f"failed to create git-gate sidecar {name}")
|
||||
create_result = subprocess.run(
|
||||
create_args, capture_output=True, text=True, check=False,
|
||||
)
|
||||
if create_result.returncode != 0:
|
||||
die(
|
||||
f"failed to create git-gate sidecar {name}: "
|
||||
f"{create_result.stderr.strip()}"
|
||||
)
|
||||
|
||||
# Order matters: entrypoint + hook first so they're present
|
||||
# when docker start fires. Per-upstream creds afterwards.
|
||||
@@ -166,12 +167,11 @@ class DockerGitGate(GitGate):
|
||||
f"{cp_result.stderr.strip()}"
|
||||
)
|
||||
|
||||
if subprocess.run(
|
||||
connect_result = subprocess.run(
|
||||
["docker", "network", "connect", plan.egress_network, name],
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
check=False,
|
||||
).returncode != 0:
|
||||
capture_output=True, text=True, check=False,
|
||||
)
|
||||
if connect_result.returncode != 0:
|
||||
subprocess.run(
|
||||
["docker", "rm", "-f", name],
|
||||
stdout=subprocess.DEVNULL,
|
||||
@@ -180,22 +180,23 @@ class DockerGitGate(GitGate):
|
||||
)
|
||||
die(
|
||||
f"failed to attach git-gate sidecar {name} to egress network "
|
||||
f"{plan.egress_network}"
|
||||
f"{plan.egress_network}: {connect_result.stderr.strip()}"
|
||||
)
|
||||
|
||||
if subprocess.run(
|
||||
["docker", "start", name],
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
check=False,
|
||||
).returncode != 0:
|
||||
start_result = subprocess.run(
|
||||
["docker", "start", name], capture_output=True, text=True, check=False,
|
||||
)
|
||||
if start_result.returncode != 0:
|
||||
subprocess.run(
|
||||
["docker", "rm", "-f", name],
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
check=False,
|
||||
)
|
||||
die(f"failed to start git-gate sidecar {name}")
|
||||
die(
|
||||
f"failed to start git-gate sidecar {name}: "
|
||||
f"{start_result.stderr.strip()}"
|
||||
)
|
||||
|
||||
return name
|
||||
|
||||
|
||||
@@ -110,8 +110,14 @@ class DockerPipelockProxy(PipelockProxy):
|
||||
"run", "--config", "/etc/pipelock.yaml",
|
||||
"--listen", f"0.0.0.0:{PIPELOCK_PORT}",
|
||||
]
|
||||
if subprocess.run(create_args, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False).returncode != 0:
|
||||
die(f"failed to create pipelock sidecar {name}")
|
||||
create_result = subprocess.run(
|
||||
create_args, capture_output=True, text=True, check=False,
|
||||
)
|
||||
if create_result.returncode != 0:
|
||||
die(
|
||||
f"failed to create pipelock sidecar {name}: "
|
||||
f"{create_result.stderr.strip()}"
|
||||
)
|
||||
|
||||
for src, dst, label in (
|
||||
(plan.yaml_path, "/etc/pipelock.yaml", "yaml"),
|
||||
@@ -131,23 +137,32 @@ class DockerPipelockProxy(PipelockProxy):
|
||||
)
|
||||
die(f"failed to copy pipelock {label} into {name}: {cp_result.stderr.strip()}")
|
||||
|
||||
if subprocess.run(
|
||||
connect_result = subprocess.run(
|
||||
["docker", "network", "connect", plan.egress_network, name],
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
check=False,
|
||||
).returncode != 0:
|
||||
subprocess.run(["docker", "rm", "-f", name], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False)
|
||||
die(f"failed to attach pipelock sidecar {name} to egress network {plan.egress_network}")
|
||||
capture_output=True, text=True, check=False,
|
||||
)
|
||||
if connect_result.returncode != 0:
|
||||
subprocess.run(
|
||||
["docker", "rm", "-f", name],
|
||||
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False,
|
||||
)
|
||||
die(
|
||||
f"failed to attach pipelock sidecar {name} to egress network "
|
||||
f"{plan.egress_network}: {connect_result.stderr.strip()}"
|
||||
)
|
||||
|
||||
if subprocess.run(
|
||||
["docker", "start", name],
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
check=False,
|
||||
).returncode != 0:
|
||||
subprocess.run(["docker", "rm", "-f", name], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False)
|
||||
die(f"failed to start pipelock sidecar {name}")
|
||||
start_result = subprocess.run(
|
||||
["docker", "start", name], capture_output=True, text=True, check=False,
|
||||
)
|
||||
if start_result.returncode != 0:
|
||||
subprocess.run(
|
||||
["docker", "rm", "-f", name],
|
||||
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False,
|
||||
)
|
||||
die(
|
||||
f"failed to start pipelock sidecar {name}: "
|
||||
f"{start_result.stderr.strip()}"
|
||||
)
|
||||
|
||||
return name
|
||||
|
||||
|
||||
@@ -19,9 +19,13 @@ from ...log import die
|
||||
from .. import BottleSpec
|
||||
from . import util as docker_mod
|
||||
from .bottle_plan import DockerBottlePlan
|
||||
from .cred_proxy import DockerCredProxy, cred_proxy_url
|
||||
from .git_gate import DockerGitGate
|
||||
from .pipelock import DockerPipelockProxy
|
||||
from .cred_proxy import (
|
||||
DockerCredProxy,
|
||||
cred_proxy_container_name,
|
||||
cred_proxy_url,
|
||||
)
|
||||
from .git_gate import DockerGitGate, git_gate_container_name
|
||||
from .pipelock import DockerPipelockProxy, pipelock_container_name
|
||||
|
||||
|
||||
def resolve_plan(
|
||||
@@ -76,6 +80,29 @@ def resolve_plan(
|
||||
f"clean up old containers with 'docker rm -f <name>'"
|
||||
)
|
||||
|
||||
# Probe sidecar container names for orphans from a previous run.
|
||||
# Sidecar names are deterministic from the slug; an orphan would
|
||||
# surface as a docker-create conflict deep inside launch() with no
|
||||
# actionable hint. Fail fast here with a cleanup pointer instead.
|
||||
# Only probe sidecars this launch will actually try to create:
|
||||
# pipelock always; git-gate when bottle.git is non-empty; cred-proxy
|
||||
# when bottle.cred_proxy.routes is non-empty.
|
||||
sidecar_probes: list[tuple[str, str]] = [
|
||||
("pipelock", pipelock_container_name(slug)),
|
||||
]
|
||||
if bottle.git:
|
||||
sidecar_probes.append(("git-gate", git_gate_container_name(slug)))
|
||||
if bottle.cred_proxy.routes:
|
||||
sidecar_probes.append(("cred-proxy", cred_proxy_container_name(slug)))
|
||||
for label, sidecar_name in sidecar_probes:
|
||||
if docker_mod.container_exists(sidecar_name):
|
||||
die(
|
||||
f"{label} sidecar container '{sidecar_name}' already exists. "
|
||||
f"This is an orphan from a previous run; clean it up with "
|
||||
f"'./cli.py cleanup' (or 'docker rm -f {sidecar_name}') and "
|
||||
f"retry."
|
||||
)
|
||||
|
||||
env_file = stage_dir / "agent.env"
|
||||
prompt_file = stage_dir / "prompt.txt"
|
||||
prompt_file.write_text("")
|
||||
|
||||
Reference in New Issue
Block a user