fix(docker): surface sidecar docker errors + probe for name orphans
Two failure-clarity paper cuts from the cred-proxy debugging:
1. Every docker create / start / network-connect call on the three
sidecars (pipelock, git-gate, cred-proxy) was piping stderr to
DEVNULL. A stuck orphan from a previous run produced "failed to
create pipelock sidecar claude-bottle-pipelock-demo" with no
pointer at the real cause ("Conflict. The container name ... is
already in use ..."). Switch each call to capture_output=True and
include the stripped stderr in the die() message.
2. The agent container had a container_exists() probe in resolve_plan
that fails fast with a hint, but the sidecars (whose names are
deterministic from the slug) didn't. So an orphan caused launch()
to bail deep inside docker create. Add a probe in resolve_plan for
each sidecar this launch will actually try to create: pipelock
always; git-gate when bottle.git is non-empty; cred-proxy when
bottle.cred_proxy.routes is non-empty. Die with a "./cli.py
cleanup" pointer.
Smoke-tested with an orphaned pipelock-<slug> container — the new
probe fires with the expected hint before any sidecar build/start
work begins.
This commit is contained in:
@@ -161,14 +161,14 @@ class DockerCredProxy(CredProxy):
|
|||||||
|
|
||||||
child_env: dict[str, str] = {**os.environ, **token_values}
|
child_env: dict[str, str] = {**os.environ, **token_values}
|
||||||
|
|
||||||
if subprocess.run(
|
create_result = subprocess.run(
|
||||||
create_args,
|
create_args, capture_output=True, text=True, env=child_env, check=False,
|
||||||
stdout=subprocess.DEVNULL,
|
)
|
||||||
stderr=subprocess.DEVNULL,
|
if create_result.returncode != 0:
|
||||||
env=child_env,
|
die(
|
||||||
check=False,
|
f"failed to create cred-proxy sidecar {name}: "
|
||||||
).returncode != 0:
|
f"{create_result.stderr.strip()}"
|
||||||
die(f"failed to create cred-proxy sidecar {name}")
|
)
|
||||||
|
|
||||||
cps: list[tuple[str, str, str]] = [
|
cps: list[tuple[str, str, str]] = [
|
||||||
(str(plan.routes_path), CRED_PROXY_ROUTES_IN_CONTAINER, "routes.json"),
|
(str(plan.routes_path), CRED_PROXY_ROUTES_IN_CONTAINER, "routes.json"),
|
||||||
@@ -202,12 +202,11 @@ class DockerCredProxy(CredProxy):
|
|||||||
f"{cp_result.stderr.strip()}"
|
f"{cp_result.stderr.strip()}"
|
||||||
)
|
)
|
||||||
|
|
||||||
if subprocess.run(
|
connect_result = subprocess.run(
|
||||||
["docker", "network", "connect", plan.egress_network, name],
|
["docker", "network", "connect", plan.egress_network, name],
|
||||||
stdout=subprocess.DEVNULL,
|
capture_output=True, text=True, check=False,
|
||||||
stderr=subprocess.DEVNULL,
|
)
|
||||||
check=False,
|
if connect_result.returncode != 0:
|
||||||
).returncode != 0:
|
|
||||||
subprocess.run(
|
subprocess.run(
|
||||||
["docker", "rm", "-f", name],
|
["docker", "rm", "-f", name],
|
||||||
stdout=subprocess.DEVNULL,
|
stdout=subprocess.DEVNULL,
|
||||||
@@ -216,22 +215,23 @@ class DockerCredProxy(CredProxy):
|
|||||||
)
|
)
|
||||||
die(
|
die(
|
||||||
f"failed to attach cred-proxy sidecar {name} to egress network "
|
f"failed to attach cred-proxy sidecar {name} to egress network "
|
||||||
f"{plan.egress_network}"
|
f"{plan.egress_network}: {connect_result.stderr.strip()}"
|
||||||
)
|
)
|
||||||
|
|
||||||
if subprocess.run(
|
start_result = subprocess.run(
|
||||||
["docker", "start", name],
|
["docker", "start", name], capture_output=True, text=True, check=False,
|
||||||
stdout=subprocess.DEVNULL,
|
)
|
||||||
stderr=subprocess.DEVNULL,
|
if start_result.returncode != 0:
|
||||||
check=False,
|
|
||||||
).returncode != 0:
|
|
||||||
subprocess.run(
|
subprocess.run(
|
||||||
["docker", "rm", "-f", name],
|
["docker", "rm", "-f", name],
|
||||||
stdout=subprocess.DEVNULL,
|
stdout=subprocess.DEVNULL,
|
||||||
stderr=subprocess.DEVNULL,
|
stderr=subprocess.DEVNULL,
|
||||||
check=False,
|
check=False,
|
||||||
)
|
)
|
||||||
die(f"failed to start cred-proxy sidecar {name}")
|
die(
|
||||||
|
f"failed to start cred-proxy sidecar {name}: "
|
||||||
|
f"{start_result.stderr.strip()}"
|
||||||
|
)
|
||||||
|
|
||||||
return name
|
return name
|
||||||
|
|
||||||
|
|||||||
@@ -110,13 +110,14 @@ class DockerGitGate(GitGate):
|
|||||||
for host, ip in git_gate_aggregate_extra_hosts(plan.upstreams).items():
|
for host, ip in git_gate_aggregate_extra_hosts(plan.upstreams).items():
|
||||||
create_args.extend(["--add-host", f"{host}:{ip}"])
|
create_args.extend(["--add-host", f"{host}:{ip}"])
|
||||||
create_args.append(GIT_GATE_IMAGE)
|
create_args.append(GIT_GATE_IMAGE)
|
||||||
if subprocess.run(
|
create_result = subprocess.run(
|
||||||
create_args,
|
create_args, capture_output=True, text=True, check=False,
|
||||||
stdout=subprocess.DEVNULL,
|
)
|
||||||
stderr=subprocess.DEVNULL,
|
if create_result.returncode != 0:
|
||||||
check=False,
|
die(
|
||||||
).returncode != 0:
|
f"failed to create git-gate sidecar {name}: "
|
||||||
die(f"failed to create git-gate sidecar {name}")
|
f"{create_result.stderr.strip()}"
|
||||||
|
)
|
||||||
|
|
||||||
# Order matters: entrypoint + hook first so they're present
|
# Order matters: entrypoint + hook first so they're present
|
||||||
# when docker start fires. Per-upstream creds afterwards.
|
# when docker start fires. Per-upstream creds afterwards.
|
||||||
@@ -166,12 +167,11 @@ class DockerGitGate(GitGate):
|
|||||||
f"{cp_result.stderr.strip()}"
|
f"{cp_result.stderr.strip()}"
|
||||||
)
|
)
|
||||||
|
|
||||||
if subprocess.run(
|
connect_result = subprocess.run(
|
||||||
["docker", "network", "connect", plan.egress_network, name],
|
["docker", "network", "connect", plan.egress_network, name],
|
||||||
stdout=subprocess.DEVNULL,
|
capture_output=True, text=True, check=False,
|
||||||
stderr=subprocess.DEVNULL,
|
)
|
||||||
check=False,
|
if connect_result.returncode != 0:
|
||||||
).returncode != 0:
|
|
||||||
subprocess.run(
|
subprocess.run(
|
||||||
["docker", "rm", "-f", name],
|
["docker", "rm", "-f", name],
|
||||||
stdout=subprocess.DEVNULL,
|
stdout=subprocess.DEVNULL,
|
||||||
@@ -180,22 +180,23 @@ class DockerGitGate(GitGate):
|
|||||||
)
|
)
|
||||||
die(
|
die(
|
||||||
f"failed to attach git-gate sidecar {name} to egress network "
|
f"failed to attach git-gate sidecar {name} to egress network "
|
||||||
f"{plan.egress_network}"
|
f"{plan.egress_network}: {connect_result.stderr.strip()}"
|
||||||
)
|
)
|
||||||
|
|
||||||
if subprocess.run(
|
start_result = subprocess.run(
|
||||||
["docker", "start", name],
|
["docker", "start", name], capture_output=True, text=True, check=False,
|
||||||
stdout=subprocess.DEVNULL,
|
)
|
||||||
stderr=subprocess.DEVNULL,
|
if start_result.returncode != 0:
|
||||||
check=False,
|
|
||||||
).returncode != 0:
|
|
||||||
subprocess.run(
|
subprocess.run(
|
||||||
["docker", "rm", "-f", name],
|
["docker", "rm", "-f", name],
|
||||||
stdout=subprocess.DEVNULL,
|
stdout=subprocess.DEVNULL,
|
||||||
stderr=subprocess.DEVNULL,
|
stderr=subprocess.DEVNULL,
|
||||||
check=False,
|
check=False,
|
||||||
)
|
)
|
||||||
die(f"failed to start git-gate sidecar {name}")
|
die(
|
||||||
|
f"failed to start git-gate sidecar {name}: "
|
||||||
|
f"{start_result.stderr.strip()}"
|
||||||
|
)
|
||||||
|
|
||||||
return name
|
return name
|
||||||
|
|
||||||
|
|||||||
@@ -110,8 +110,14 @@ class DockerPipelockProxy(PipelockProxy):
|
|||||||
"run", "--config", "/etc/pipelock.yaml",
|
"run", "--config", "/etc/pipelock.yaml",
|
||||||
"--listen", f"0.0.0.0:{PIPELOCK_PORT}",
|
"--listen", f"0.0.0.0:{PIPELOCK_PORT}",
|
||||||
]
|
]
|
||||||
if subprocess.run(create_args, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False).returncode != 0:
|
create_result = subprocess.run(
|
||||||
die(f"failed to create pipelock sidecar {name}")
|
create_args, capture_output=True, text=True, check=False,
|
||||||
|
)
|
||||||
|
if create_result.returncode != 0:
|
||||||
|
die(
|
||||||
|
f"failed to create pipelock sidecar {name}: "
|
||||||
|
f"{create_result.stderr.strip()}"
|
||||||
|
)
|
||||||
|
|
||||||
for src, dst, label in (
|
for src, dst, label in (
|
||||||
(plan.yaml_path, "/etc/pipelock.yaml", "yaml"),
|
(plan.yaml_path, "/etc/pipelock.yaml", "yaml"),
|
||||||
@@ -131,23 +137,32 @@ class DockerPipelockProxy(PipelockProxy):
|
|||||||
)
|
)
|
||||||
die(f"failed to copy pipelock {label} into {name}: {cp_result.stderr.strip()}")
|
die(f"failed to copy pipelock {label} into {name}: {cp_result.stderr.strip()}")
|
||||||
|
|
||||||
if subprocess.run(
|
connect_result = subprocess.run(
|
||||||
["docker", "network", "connect", plan.egress_network, name],
|
["docker", "network", "connect", plan.egress_network, name],
|
||||||
stdout=subprocess.DEVNULL,
|
capture_output=True, text=True, check=False,
|
||||||
stderr=subprocess.DEVNULL,
|
)
|
||||||
check=False,
|
if connect_result.returncode != 0:
|
||||||
).returncode != 0:
|
subprocess.run(
|
||||||
subprocess.run(["docker", "rm", "-f", name], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False)
|
["docker", "rm", "-f", name],
|
||||||
die(f"failed to attach pipelock sidecar {name} to egress network {plan.egress_network}")
|
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False,
|
||||||
|
)
|
||||||
|
die(
|
||||||
|
f"failed to attach pipelock sidecar {name} to egress network "
|
||||||
|
f"{plan.egress_network}: {connect_result.stderr.strip()}"
|
||||||
|
)
|
||||||
|
|
||||||
if subprocess.run(
|
start_result = subprocess.run(
|
||||||
["docker", "start", name],
|
["docker", "start", name], capture_output=True, text=True, check=False,
|
||||||
stdout=subprocess.DEVNULL,
|
)
|
||||||
stderr=subprocess.DEVNULL,
|
if start_result.returncode != 0:
|
||||||
check=False,
|
subprocess.run(
|
||||||
).returncode != 0:
|
["docker", "rm", "-f", name],
|
||||||
subprocess.run(["docker", "rm", "-f", name], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False)
|
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False,
|
||||||
die(f"failed to start pipelock sidecar {name}")
|
)
|
||||||
|
die(
|
||||||
|
f"failed to start pipelock sidecar {name}: "
|
||||||
|
f"{start_result.stderr.strip()}"
|
||||||
|
)
|
||||||
|
|
||||||
return name
|
return name
|
||||||
|
|
||||||
|
|||||||
@@ -19,9 +19,13 @@ from ...log import die
|
|||||||
from .. import BottleSpec
|
from .. import BottleSpec
|
||||||
from . import util as docker_mod
|
from . import util as docker_mod
|
||||||
from .bottle_plan import DockerBottlePlan
|
from .bottle_plan import DockerBottlePlan
|
||||||
from .cred_proxy import DockerCredProxy, cred_proxy_url
|
from .cred_proxy import (
|
||||||
from .git_gate import DockerGitGate
|
DockerCredProxy,
|
||||||
from .pipelock import DockerPipelockProxy
|
cred_proxy_container_name,
|
||||||
|
cred_proxy_url,
|
||||||
|
)
|
||||||
|
from .git_gate import DockerGitGate, git_gate_container_name
|
||||||
|
from .pipelock import DockerPipelockProxy, pipelock_container_name
|
||||||
|
|
||||||
|
|
||||||
def resolve_plan(
|
def resolve_plan(
|
||||||
@@ -76,6 +80,29 @@ def resolve_plan(
|
|||||||
f"clean up old containers with 'docker rm -f <name>'"
|
f"clean up old containers with 'docker rm -f <name>'"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Probe sidecar container names for orphans from a previous run.
|
||||||
|
# Sidecar names are deterministic from the slug; an orphan would
|
||||||
|
# surface as a docker-create conflict deep inside launch() with no
|
||||||
|
# actionable hint. Fail fast here with a cleanup pointer instead.
|
||||||
|
# Only probe sidecars this launch will actually try to create:
|
||||||
|
# pipelock always; git-gate when bottle.git is non-empty; cred-proxy
|
||||||
|
# when bottle.cred_proxy.routes is non-empty.
|
||||||
|
sidecar_probes: list[tuple[str, str]] = [
|
||||||
|
("pipelock", pipelock_container_name(slug)),
|
||||||
|
]
|
||||||
|
if bottle.git:
|
||||||
|
sidecar_probes.append(("git-gate", git_gate_container_name(slug)))
|
||||||
|
if bottle.cred_proxy.routes:
|
||||||
|
sidecar_probes.append(("cred-proxy", cred_proxy_container_name(slug)))
|
||||||
|
for label, sidecar_name in sidecar_probes:
|
||||||
|
if docker_mod.container_exists(sidecar_name):
|
||||||
|
die(
|
||||||
|
f"{label} sidecar container '{sidecar_name}' already exists. "
|
||||||
|
f"This is an orphan from a previous run; clean it up with "
|
||||||
|
f"'./cli.py cleanup' (or 'docker rm -f {sidecar_name}') and "
|
||||||
|
f"retry."
|
||||||
|
)
|
||||||
|
|
||||||
env_file = stage_dir / "agent.env"
|
env_file = stage_dir / "agent.env"
|
||||||
prompt_file = stage_dir / "prompt.txt"
|
prompt_file = stage_dir / "prompt.txt"
|
||||||
prompt_file.write_text("")
|
prompt_file.write_text("")
|
||||||
|
|||||||
Reference in New Issue
Block a user