feat(sidecars): bundle image + init supervisor (PRD 0024 chunk 1) #55
@@ -0,0 +1,108 @@
|
||||
# Per-bottle sidecar bundle image (PRD 0024).
|
||||
#
|
||||
# Collapses the four prior per-sidecar images (pipelock, egress,
|
||||
# git-gate, supervise) into one. A small stdlib-Python init
|
||||
# supervisor at /app/sidecar_init.py spawns all four daemons,
|
||||
# forwards SIGTERM, and propagates per-daemon stdout/stderr to the
|
||||
# container log with a `[name]` prefix. See PRD 0024 for the
|
||||
# rationale.
|
||||
#
|
||||
# Layout (preserved verbatim from the prior four Dockerfiles so the
|
||||
# compose renderer's bind-mount paths and docker-cp targets keep
|
||||
# working):
|
||||
#
|
||||
# /usr/local/bin/pipelock pipelock binary
|
||||
# /usr/bin/gitleaks gitleaks binary
|
||||
# /app/egress_addon.py + siblings mitmproxy addon (egress)
|
||||
# /app/egress-entrypoint.sh mitmdump launcher
|
||||
# /app/supervise_server.py + .py supervise MCP server
|
||||
# /app/sidecar_init.py PID 1 supervisor
|
||||
# /etc/pipelock.yaml bind-mounted at run time
|
||||
# /etc/egress/routes.yaml bind-mounted at run time
|
||||
# /etc/git-gate/pre-receive docker-cp'd at start time
|
||||
# /git-gate-entrypoint.sh docker-cp'd at start time
|
||||
# /git-gate/creds/* docker-cp'd at start time
|
||||
# /git/* bare repos, populated at runtime
|
||||
# /run/supervise/queue/ bind-mounted at run time
|
||||
# /home/mitmproxy/.mitmproxy/ mitmproxy CA dir
|
||||
#
|
||||
# Exposed ports inside the container:
|
||||
# 8888 pipelock (HTTPS_PROXY)
|
||||
# 9099 egress (mitmproxy, pipelock's upstream — not externally
|
||||
# addressed by the agent)
|
||||
# 9418 git-gate (git-daemon)
|
||||
# 9100 supervise (MCP HTTP)
|
||||
|
||||
# Stage 1: pipelock binary. The upstream pipelock image is a
|
||||
# scratch image with the binary at /pipelock (entrypoint).
|
||||
# Pinned by digest in lockstep with
|
||||
# claude_bottle/backend/docker/pipelock.py:PIPELOCK_IMAGE.
|
||||
FROM ghcr.io/luckypipewrench/pipelock@sha256:3b1a39417b98406ddc5dc2d8fcb42865ddc0c68a43d355db55f0f8cb06bc6de9 AS pipelock-src
|
||||
|
||||
# Stage 2: gitleaks binary. The upstream gitleaks image is alpine
|
||||
# with the binary at /usr/bin/gitleaks. Pinned by digest in lockstep
|
||||
# with Dockerfile.git-gate's prior base (now deleted at chunk 3).
|
||||
FROM zricethezav/gitleaks@sha256:c00b6bd0aeb3071cbcb79009cb16a60dd9e0a7c60e2be9ab65d25e6bc8abbb7f AS gitleaks-src
|
||||
|
||||
# Stage 3: assembly. mitmproxy/mitmproxy is debian-slim-based with
|
||||
# Python + mitmdump pre-installed — heavier than the others, so
|
||||
# this stage starts there and pulls the standalone binaries in.
|
||||
FROM mitmproxy/mitmproxy:11.1.3
|
||||
|
||||
# Run as root inside the bundle. The bundle is the isolation
|
||||
# boundary; per-daemon user separation inside it is not load-bearing
|
||||
# and complicates the supervisor's spawn path.
|
||||
USER root
|
||||
|
||||
# Runtime system deps:
|
||||
# git supplies the `git daemon` subcommand (no separate package)
|
||||
# plus the core `git` binary the pre-receive hook invokes.
|
||||
# openssh-client supplies the upstream SSH transport the
|
||||
# pre-receive hook uses to forward accepted refs.
|
||||
# ca-certificates is needed for both pipelock and mitmdump
|
||||
# upstream TLS (the base image already has it; listed for
|
||||
# explicitness).
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y --no-install-recommends \
|
||||
git openssh-client ca-certificates \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Pull the standalone binaries into the final image.
|
||||
COPY --from=pipelock-src /pipelock /usr/local/bin/pipelock
|
||||
COPY --from=gitleaks-src /usr/bin/gitleaks /usr/bin/gitleaks
|
||||
|
||||
# Project Python: addon + server modules + the init supervisor.
|
||||
# Kept flat under /app/ so mitmdump's loader resolves them as
|
||||
# top-level siblings (absolute imports), matching the prior
|
||||
# Dockerfile.egress / Dockerfile.supervise layout.
|
||||
COPY claude_bottle/egress_addon_core.py /app/egress_addon_core.py
|
||||
COPY claude_bottle/egress_addon.py /app/egress_addon.py
|
||||
COPY claude_bottle/yaml_subset.py /app/yaml_subset.py
|
||||
COPY claude_bottle/supervise.py /app/supervise.py
|
||||
COPY claude_bottle/supervise_server.py /app/supervise_server.py
|
||||
COPY claude_bottle/sidecar_init.py /app/sidecar_init.py
|
||||
COPY claude_bottle/egress_entrypoint.sh /app/egress-entrypoint.sh
|
||||
RUN chmod +x /app/egress-entrypoint.sh
|
||||
|
||||
# Pre-create runtime directories the compose renderer + start
|
||||
# step expect to exist. `docker cp` does not create intermediate
|
||||
# dirs, and bind mounts won't either if the parent is missing.
|
||||
RUN mkdir -p \
|
||||
/etc/egress \
|
||||
/etc/git-gate \
|
||||
/git-gate/creds \
|
||||
/git \
|
||||
/run/supervise/queue \
|
||||
/home/mitmproxy/.mitmproxy
|
||||
|
||||
# Documentation only — the compose renderer publishes whichever
|
||||
# subset the bottle uses.
|
||||
EXPOSE 8888 9099 9418 9100
|
||||
|
||||
# WORKDIR matches Dockerfile.supervise's prior layout so the
|
||||
# in-app same-dir import in supervise_server.py stays deterministic.
|
||||
WORKDIR /app
|
||||
|
||||
# PID 1 is the supervisor. It owns signal handling and exit-code
|
||||
# propagation; no `exec` chain in the entrypoint itself.
|
||||
ENTRYPOINT ["python3", "/app/sidecar_init.py"]
|
||||
@@ -0,0 +1,35 @@
|
||||
#!/bin/sh
|
||||
# Egress daemon entrypoint inside the sidecar bundle (PRD 0024).
|
||||
#
|
||||
# Extracted verbatim from Dockerfile.egress's prior inline `sh -c`
|
||||
# ENTRYPOINT so the supervisor in claude_bottle/sidecar_init.py can
|
||||
# call it as a normal child. Behavior is unchanged:
|
||||
#
|
||||
# * Upstream proxy: when EGRESS_UPSTREAM_PROXY is set, switch
|
||||
# to `--mode upstream:URL` to forward all post-MITM traffic
|
||||
# through pipelock. mitmproxy does NOT honor HTTPS_PROXY on
|
||||
# its outbound side, so the upstream wiring has to be the
|
||||
# mitmproxy mode flag, not env.
|
||||
# * Upstream trust: when EGRESS_UPSTREAM_CA is set, build a
|
||||
# combined trust bundle (system roots + pipelock CA) and point
|
||||
# mitmproxy at it. The option REPLACES mitmproxy's default
|
||||
# trust store, so passing pipelock's CA alone would break
|
||||
# pipelock-passthrough hosts (api.anthropic.com etc.).
|
||||
# * `-s /app/egress_addon.py` loads the addon that reads
|
||||
# /etc/egress/routes.yaml.
|
||||
|
||||
set -e
|
||||
|
||||
MODE="--mode regular@9099"
|
||||
if [ -n "$EGRESS_UPSTREAM_PROXY" ]; then
|
||||
MODE="--mode upstream:$EGRESS_UPSTREAM_PROXY --listen-port 9099"
|
||||
fi
|
||||
|
||||
TRUST_FLAG=""
|
||||
if [ -n "$EGRESS_UPSTREAM_CA" ] && [ -f "$EGRESS_UPSTREAM_CA" ]; then
|
||||
COMBINED=/home/mitmproxy/.mitmproxy/combined-trust.pem
|
||||
cat /etc/ssl/certs/ca-certificates.crt "$EGRESS_UPSTREAM_CA" > "$COMBINED"
|
||||
TRUST_FLAG="--set ssl_verify_upstream_trusted_ca=$COMBINED"
|
||||
fi
|
||||
|
||||
exec mitmdump $MODE $TRUST_FLAG -s /app/egress_addon.py
|
||||
@@ -0,0 +1,219 @@
|
||||
"""Per-bottle sidecar supervisor (PRD 0024 chunk 1).
|
||||
|
||||
PID 1 inside the `claude-bottle-sidecars` bundle image. Spawns
|
||||
the configured daemons (egress, pipelock, git-gate, supervise),
|
||||
forwards SIGTERM/SIGINT to each child, and propagates per-daemon
|
||||
stdout+stderr to the container log with a `[name] ` prefix.
|
||||
|
||||
Failure policy (interim): when a child dies unexpectedly, the
|
||||
supervisor logs the death and leaves the surviving children
|
||||
running. The bundle stays up; whatever the dead daemon served
|
||||
will start failing, surfacing in the agent's own error path.
|
||||
The supervisor itself exits only when (a) the operator/compose
|
||||
sends SIGTERM/SIGINT, or (b) every child has died.
|
||||
|
||||
Failure policy (eventual): on unexpected death, the supervisor
|
||||
restarts the daemon and emits a notification to the supervise
|
||||
sidecar so the operator sees the event. That lands in a later
|
||||
PR; the interim policy is "don't take the bundle down for one
|
||||
sick daemon."
|
||||
|
||||
Daemon subset is env-driven. The compose renderer narrows it via
|
||||
`CLAUDE_BOTTLE_SIDECAR_DAEMONS=egress,pipelock` for bottles that
|
||||
don't use git-gate or supervise. Default: all four.
|
||||
|
||||
Stdlib-only by design — adding supervisord/s6/runit for four
|
||||
daemons is heavier than this script.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import signal
|
||||
import subprocess
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from typing import IO, Sequence
|
||||
|
||||
|
||||
# Below compose's default 10s `stop_grace_period`. After this many
|
||||
# seconds past SIGTERM, escalate to SIGKILL on any still-running
|
||||
# child.
|
||||
_GRACE_SECONDS = 8.0
|
||||
|
||||
# Tight enough that exits and signals propagate without lag; loose
|
||||
# enough that the main loop isn't a CPU hog.
|
||||
_POLL_INTERVAL = 0.1
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class _DaemonSpec:
|
||||
name: str
|
||||
argv: Sequence[str]
|
||||
|
||||
|
||||
# Order matters only for first-launch race-window reasons: egress
|
||||
# starts first so pipelock's upstream connect succeeds during
|
||||
# pipelock's own startup. git-gate and supervise are independent.
|
||||
_DAEMONS: tuple[_DaemonSpec, ...] = (
|
||||
_DaemonSpec("egress", ("/bin/sh", "/app/egress-entrypoint.sh")),
|
||||
_DaemonSpec(
|
||||
"pipelock",
|
||||
("/usr/local/bin/pipelock", "run", "--config", "/etc/pipelock.yaml"),
|
||||
),
|
||||
_DaemonSpec("git-gate", ("/bin/sh", "/git-gate-entrypoint.sh")),
|
||||
_DaemonSpec("supervise", ("python3", "/app/supervise_server.py")),
|
||||
)
|
||||
|
||||
|
||||
def _selected_daemons(
|
||||
env: dict[str, str],
|
||||
all_daemons: Sequence[_DaemonSpec] | None = None,
|
||||
) -> tuple[_DaemonSpec, ...]:
|
||||
"""Filter the daemon set by the CLAUDE_BOTTLE_SIDECAR_DAEMONS env
|
||||
var. Unknown names in the list are ignored — the renderer is the
|
||||
source of truth for which daemons are wired.
|
||||
|
||||
`all_daemons` defaults to `_DAEMONS` resolved at call time (not
|
||||
at definition time), so tests can monkey-patch the module-level
|
||||
`_DAEMONS` and have the new value take effect."""
|
||||
if all_daemons is None:
|
||||
all_daemons = _DAEMONS
|
||||
raw = env.get("CLAUDE_BOTTLE_SIDECAR_DAEMONS", "").strip()
|
||||
if not raw:
|
||||
return tuple(all_daemons)
|
||||
wanted = {n.strip() for n in raw.split(",") if n.strip()}
|
||||
return tuple(d for d in all_daemons if d.name in wanted)
|
||||
|
||||
|
||||
def _log(msg: str) -> None:
|
||||
sys.stdout.write(f"sidecar-init: {msg}\n")
|
||||
sys.stdout.flush()
|
||||
|
||||
|
||||
def _pump(name: str, stream: IO[bytes]) -> None:
|
||||
"""Read lines from `stream`, prefix with `[name]`, write to
|
||||
stdout. Runs in its own thread per child; daemon=True so a
|
||||
blocked read doesn't keep the process alive after main exits."""
|
||||
for raw in iter(stream.readline, b""):
|
||||
line = raw.decode("utf-8", errors="replace").rstrip("\n")
|
||||
sys.stdout.write(f"[{name}] {line}\n")
|
||||
sys.stdout.flush()
|
||||
|
||||
|
||||
def _spawn(spec: _DaemonSpec) -> subprocess.Popen:
|
||||
proc = subprocess.Popen(
|
||||
list(spec.argv),
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.STDOUT,
|
||||
bufsize=0,
|
||||
)
|
||||
threading.Thread(
|
||||
target=_pump, args=(spec.name, proc.stdout), daemon=True
|
||||
).start()
|
||||
return proc
|
||||
|
||||
|
||||
class _Supervisor:
|
||||
"""Holds the running children + shutdown state. Pulled out so
|
||||
the test suite can drive it with fake commands."""
|
||||
|
||||
def __init__(self, specs: Sequence[_DaemonSpec]):
|
||||
self.specs = tuple(specs)
|
||||
self.procs: list[tuple[_DaemonSpec, subprocess.Popen]] = []
|
||||
self.shutdown_at: float | None = None
|
||||
# Names of children that have been logged as having exited
|
||||
# so we only log each death once across watch-loop ticks.
|
||||
self._logged_dead: set[str] = set()
|
||||
|
||||
def start_all(self) -> None:
|
||||
for spec in self.specs:
|
||||
_log(f"starting {spec.name}")
|
||||
self.procs.append((spec, _spawn(spec)))
|
||||
|
||||
def request_shutdown(self, reason: str) -> None:
|
||||
if self.shutdown_at is not None:
|
||||
return
|
||||
self.shutdown_at = time.monotonic()
|
||||
_log(f"shutting down ({reason}); forwarding SIGTERM")
|
||||
for _, p in self.procs:
|
||||
if p.poll() is None:
|
||||
try:
|
||||
p.terminate()
|
||||
except ProcessLookupError:
|
||||
pass
|
||||
|
||||
def tick(self) -> bool:
|
||||
"""One iteration of the watch loop. Returns True when every
|
||||
child has exited and the supervisor can return.
|
||||
|
||||
A child dying unexpectedly is logged but does NOT initiate
|
||||
shutdown — see the module docstring's failure-policy
|
||||
section. Shutdown is signal-driven only."""
|
||||
for spec, p in self.procs:
|
||||
rc = p.poll()
|
||||
if rc is None or spec.name in self._logged_dead:
|
||||
continue
|
||||
self._logged_dead.add(spec.name)
|
||||
if self.shutdown_at is None:
|
||||
_log(
|
||||
f"{spec.name} exited with code {rc}; leaving "
|
||||
f"surviving daemons running (operator-visible "
|
||||
f"via agent-side failure)"
|
||||
)
|
||||
else:
|
||||
_log(f"{spec.name} exited with code {rc}")
|
||||
|
||||
if self.shutdown_at is not None:
|
||||
elapsed = time.monotonic() - self.shutdown_at
|
||||
if elapsed > _GRACE_SECONDS:
|
||||
still_running = [
|
||||
spec.name for spec, p in self.procs if p.poll() is None
|
||||
]
|
||||
if still_running:
|
||||
_log(
|
||||
f"grace ({_GRACE_SECONDS:.0f}s) elapsed; SIGKILL on "
|
||||
f"{', '.join(still_running)}"
|
||||
)
|
||||
for _, p in self.procs:
|
||||
if p.poll() is None:
|
||||
try:
|
||||
p.kill()
|
||||
except ProcessLookupError:
|
||||
pass
|
||||
|
||||
return all(p.poll() is not None for _, p in self.procs)
|
||||
|
||||
def exit_code(self) -> int:
|
||||
"""Worst child returncode wins. On graceful shutdown every
|
||||
child is signal-killed (negative returncode) and max()
|
||||
returns 0; if some child crashed nonzero before the signal
|
||||
the operator gets that code on container exit."""
|
||||
return max((p.returncode for _, p in self.procs), default=0)
|
||||
|
||||
|
||||
def main(argv: Sequence[str] | None = None) -> int:
|
||||
del argv # no flags yet; env-driven only
|
||||
specs = _selected_daemons(dict(os.environ))
|
||||
if not specs:
|
||||
_log("no daemons selected; nothing to do")
|
||||
return 0
|
||||
|
||||
sup = _Supervisor(specs)
|
||||
sup.start_all()
|
||||
|
||||
signal.signal(signal.SIGTERM, lambda *_: sup.request_shutdown("SIGTERM"))
|
||||
signal.signal(signal.SIGINT, lambda *_: sup.request_shutdown("SIGINT"))
|
||||
|
||||
while not sup.tick():
|
||||
time.sleep(_POLL_INTERVAL)
|
||||
|
||||
rc = sup.exit_code()
|
||||
_log(f"exit {rc}")
|
||||
return rc
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
@@ -390,17 +390,21 @@ rewrite.
|
||||
## Open questions
|
||||
|
||||
1. **Init failure semantics.** When one daemon crashes mid-run,
|
||||
should the bundle exit (killing the bottle) or restart just
|
||||
that daemon? Today, with four separate containers, docker
|
||||
restarts the crashed one and the bottle stays up. Default
|
||||
for this PRD: bundle exits on any child death; the bottle
|
||||
tears down. Restart logic can land later if operators hit
|
||||
it in practice.
|
||||
2. **Exit-code propagation.** If multiple daemons die in quick
|
||||
succession (likely under SIGTERM), which exit code wins?
|
||||
First-to-die is simplest. Worst-case (highest nonzero
|
||||
exit code) gives clearest signal in logs. Default to
|
||||
first-to-die unless an operator scenario disagrees.
|
||||
the bundle does NOT tear down the survivors — the failure is
|
||||
logged, the surviving daemons keep running, and whatever the
|
||||
dead one served starts failing in a way the agent surfaces.
|
||||
The eventual design is restart-the-dead-daemon plus a
|
||||
notification to the supervise sidecar so the operator sees
|
||||
the event explicitly; chunk 1 ships only the "log and leave
|
||||
alone" half. Tear-down-the-bundle was considered and
|
||||
rejected: one sick daemon shouldn't take the bottle offline.
|
||||
2. **Exit-code propagation.** When the supervisor finally exits
|
||||
(signal-driven shutdown, or every child having died on its
|
||||
own), the container exits with `max(child returncodes)` —
|
||||
the worst nonzero code wins. On graceful shutdown every child
|
||||
is signal-killed (negative returncode) so the max is 0; a
|
||||
crashed-before-signal daemon's nonzero code wins and reaches
|
||||
the operator on container exit.
|
||||
3. **Image pin policy.** Pin `claude-bottle-sidecars` by tag
|
||||
(`:latest` rebuilt per-release) or by digest written into a
|
||||
`CLAUDE_BOTTLE_SIDECAR_IMAGE` env var like the existing
|
||||
|
||||
@@ -0,0 +1,122 @@
|
||||
"""Integration: PRD 0024 chunk 1 — the sidecar bundle image builds
|
||||
and the four daemon binaries are present + executable inside it.
|
||||
|
||||
This test does NOT exercise the daemons running against real
|
||||
config (pipelock.yaml, routes.yaml, etc) — that lands in chunk 2
|
||||
when the renderer wires the bundle into compose. What we verify
|
||||
here is the chunk-1 contract:
|
||||
|
||||
- Dockerfile.sidecars builds (multi-stage works, base layers
|
||||
pull, COPYs resolve).
|
||||
- pipelock, gitleaks, mitmdump are at the documented paths and
|
||||
answer `--version`.
|
||||
- The Python init at /app/sidecar_init.py runs and prints the
|
||||
expected "no daemons selected" line when the supervisor is
|
||||
pointed at an empty daemon set.
|
||||
|
||||
Skips cleanly when docker is unavailable, or under act_runner
|
||||
where the host bind-mount topology breaks multi-stage builds
|
||||
that pull large bases.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
import unittest
|
||||
|
||||
from tests._docker import skip_unless_docker
|
||||
|
||||
|
||||
_IMAGE = "claude-bottle-sidecars-test:chunk1"
|
||||
_DOCKERFILE = "Dockerfile.sidecars"
|
||||
|
||||
|
||||
@skip_unless_docker()
|
||||
@unittest.skipIf(
|
||||
os.environ.get("GITEA_ACTIONS") == "true",
|
||||
"skipped under act_runner: multi-stage build pulls a 200+MB "
|
||||
"mitmproxy base + two upstream sidecar images; runner storage "
|
||||
"+ time budget make this an interactive-only test",
|
||||
)
|
||||
class TestSidecarBundleImage(unittest.TestCase):
|
||||
"""Builds the image once for the class, then runs a few
|
||||
`docker run` probes against it."""
|
||||
|
||||
@classmethod
|
||||
def setUpClass(cls) -> None:
|
||||
repo_root = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
|
||||
proc = subprocess.run(
|
||||
["docker", "build", "-t", _IMAGE,
|
||||
"-f", _DOCKERFILE, "."],
|
||||
cwd=repo_root,
|
||||
stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
|
||||
)
|
||||
if proc.returncode != 0:
|
||||
raise unittest.SkipTest(
|
||||
f"docker build failed; skipping image probes.\n"
|
||||
f"{proc.stdout.decode('utf-8', errors='replace')[-2000:]}"
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def tearDownClass(cls) -> None:
|
||||
subprocess.run(
|
||||
["docker", "image", "rm", "-f", _IMAGE],
|
||||
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
|
||||
)
|
||||
|
||||
def _run_in_image(self, *cmd: str, timeout: float = 30.0) -> tuple[int, str]:
|
||||
proc = subprocess.run(
|
||||
["docker", "run", "--rm", "--entrypoint", cmd[0], _IMAGE,
|
||||
*cmd[1:]],
|
||||
stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
|
||||
timeout=timeout,
|
||||
)
|
||||
return proc.returncode, proc.stdout.decode("utf-8", errors="replace")
|
||||
|
||||
def test_pipelock_binary_present_and_versioned(self):
|
||||
rc, out = self._run_in_image("/usr/local/bin/pipelock", "version")
|
||||
self.assertEqual(0, rc, msg=out)
|
||||
self.assertIn("pipelock version", out)
|
||||
|
||||
def test_gitleaks_binary_present_and_versioned(self):
|
||||
rc, out = self._run_in_image("/usr/bin/gitleaks", "version")
|
||||
self.assertEqual(0, rc, msg=out)
|
||||
# gitleaks prints a bare version string like "v8.x.y".
|
||||
self.assertRegex(out, r"v?\d+\.\d+")
|
||||
|
||||
def test_mitmdump_binary_present_and_versioned(self):
|
||||
rc, out = self._run_in_image("mitmdump", "--version")
|
||||
self.assertEqual(0, rc, msg=out)
|
||||
self.assertIn("Mitmproxy", out)
|
||||
|
||||
def test_python_imports_supervise_module(self):
|
||||
# The bundle's supervise daemon imports `supervise` as a
|
||||
# same-directory sibling of `supervise_server`. Probe the
|
||||
# import resolves with `python3 -c` from /app (the
|
||||
# Dockerfile's WORKDIR).
|
||||
rc, out = self._run_in_image(
|
||||
"python3", "-c",
|
||||
"import supervise; import supervise_server; print('ok')",
|
||||
)
|
||||
self.assertEqual(0, rc, msg=out)
|
||||
self.assertIn("ok", out)
|
||||
|
||||
def test_init_supervisor_runs_with_no_daemons(self):
|
||||
# `nothing` matches no canonical daemon → supervisor exits 0
|
||||
# immediately with the documented message. Confirms the
|
||||
# ENTRYPOINT wiring works.
|
||||
proc = subprocess.run(
|
||||
["docker", "run", "--rm",
|
||||
"-e", "CLAUDE_BOTTLE_SIDECAR_DAEMONS=nothing",
|
||||
_IMAGE],
|
||||
stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
|
||||
timeout=10.0,
|
||||
)
|
||||
out = proc.stdout.decode("utf-8", errors="replace")
|
||||
self.assertEqual(0, proc.returncode, msg=out)
|
||||
self.assertIn("no daemons selected", out)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -0,0 +1,299 @@
|
||||
"""Unit: sidecar bundle init supervisor (PRD 0024 chunk 1).
|
||||
|
||||
Tests both the helper functions in `claude_bottle.sidecar_init`
|
||||
and the supervisor's end-to-end signal / exit-code behavior. The
|
||||
end-to-end tests use real subprocesses (`/bin/sleep`,
|
||||
`/bin/sh -c '...'`) — short-lived, no docker required — so they
|
||||
run under `tests/unit/` rather than `tests/integration/`."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import signal
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
from claude_bottle.sidecar_init import (
|
||||
_DaemonSpec,
|
||||
_Supervisor,
|
||||
_selected_daemons,
|
||||
)
|
||||
|
||||
|
||||
class TestSelectedDaemons(unittest.TestCase):
|
||||
"""Env-var subset filtering. The compose renderer is the source
|
||||
of truth for which daemons are wired; the supervisor just
|
||||
honors what it's told."""
|
||||
|
||||
_DAEMONS = (
|
||||
_DaemonSpec("egress", ("/bin/sh", "-c", ":")),
|
||||
_DaemonSpec("pipelock", ("/bin/sh", "-c", ":")),
|
||||
_DaemonSpec("git-gate", ("/bin/sh", "-c", ":")),
|
||||
_DaemonSpec("supervise", ("/bin/sh", "-c", ":")),
|
||||
)
|
||||
|
||||
def test_unset_returns_all(self):
|
||||
got = _selected_daemons({}, all_daemons=self._DAEMONS)
|
||||
self.assertEqual([d.name for d in got],
|
||||
["egress", "pipelock", "git-gate", "supervise"])
|
||||
|
||||
def test_empty_returns_all(self):
|
||||
got = _selected_daemons({"CLAUDE_BOTTLE_SIDECAR_DAEMONS": ""},
|
||||
all_daemons=self._DAEMONS)
|
||||
self.assertEqual(4, len(got))
|
||||
|
||||
def test_whitespace_only_returns_all(self):
|
||||
got = _selected_daemons({"CLAUDE_BOTTLE_SIDECAR_DAEMONS": " "},
|
||||
all_daemons=self._DAEMONS)
|
||||
self.assertEqual(4, len(got))
|
||||
|
||||
def test_explicit_subset(self):
|
||||
got = _selected_daemons(
|
||||
{"CLAUDE_BOTTLE_SIDECAR_DAEMONS": "egress,pipelock"},
|
||||
all_daemons=self._DAEMONS,
|
||||
)
|
||||
self.assertEqual([d.name for d in got], ["egress", "pipelock"])
|
||||
|
||||
def test_preserves_canonical_order(self):
|
||||
# Order in the env var doesn't matter; the result follows
|
||||
# the canonical _DAEMONS order so egress starts before
|
||||
# pipelock (race-window reason).
|
||||
got = _selected_daemons(
|
||||
{"CLAUDE_BOTTLE_SIDECAR_DAEMONS": "supervise,pipelock,egress"},
|
||||
all_daemons=self._DAEMONS,
|
||||
)
|
||||
self.assertEqual([d.name for d in got],
|
||||
["egress", "pipelock", "supervise"])
|
||||
|
||||
def test_unknown_names_ignored(self):
|
||||
got = _selected_daemons(
|
||||
{"CLAUDE_BOTTLE_SIDECAR_DAEMONS": "egress,bogus"},
|
||||
all_daemons=self._DAEMONS,
|
||||
)
|
||||
self.assertEqual([d.name for d in got], ["egress"])
|
||||
|
||||
def test_whitespace_in_names_stripped(self):
|
||||
got = _selected_daemons(
|
||||
{"CLAUDE_BOTTLE_SIDECAR_DAEMONS": " egress , pipelock "},
|
||||
all_daemons=self._DAEMONS,
|
||||
)
|
||||
self.assertEqual([d.name for d in got], ["egress", "pipelock"])
|
||||
|
||||
|
||||
class TestSupervisor(unittest.TestCase):
|
||||
"""End-to-end: drive `_Supervisor` directly with fake commands.
|
||||
We don't go through `main()` because main installs signal
|
||||
handlers process-wide, which collides with the test runner."""
|
||||
|
||||
def _drive(self, sup: _Supervisor, max_wait_s: float = 6.0) -> int:
|
||||
deadline = time.monotonic() + max_wait_s
|
||||
while not sup.tick():
|
||||
if time.monotonic() > deadline:
|
||||
self.fail("supervisor watch loop did not converge in time")
|
||||
time.sleep(0.05)
|
||||
return sup.exit_code()
|
||||
|
||||
def test_all_children_succeed_returns_zero(self):
|
||||
# `sh -c :` exits 0 immediately. With the new failure
|
||||
# policy a child dying doesn't trigger shutdown, so the
|
||||
# loop only converges once BOTH have exited on their own.
|
||||
# Both exit 0 → max(0, 0) = 0.
|
||||
specs = [
|
||||
_DaemonSpec("a", ("/bin/sh", "-c", ":")),
|
||||
_DaemonSpec("b", ("/bin/sh", "-c", ":")),
|
||||
]
|
||||
sup = _Supervisor(specs)
|
||||
sup.start_all()
|
||||
rc = self._drive(sup)
|
||||
self.assertEqual(0, rc)
|
||||
|
||||
def test_child_crash_does_not_initiate_shutdown(self):
|
||||
# Failure policy (PRD 0024, interim): a child dying
|
||||
# unexpectedly is logged but the supervisor does NOT tear
|
||||
# down the survivors. Verified by giving the crasher
|
||||
# ~0.3s to die, then asserting the long-runner is still
|
||||
# up and the supervisor never set shutdown_at.
|
||||
specs = [
|
||||
_DaemonSpec("crasher", ("/bin/sh", "-c", "exit 1")),
|
||||
_DaemonSpec("longrun", ("/bin/sleep", "30")),
|
||||
]
|
||||
sup = _Supervisor(specs)
|
||||
sup.start_all()
|
||||
# Drive ticks for a while; crasher should die, longrun
|
||||
# should survive.
|
||||
deadline = time.monotonic() + 1.0
|
||||
while time.monotonic() < deadline:
|
||||
done = sup.tick()
|
||||
self.assertFalse(done, "loop converged with a child still alive")
|
||||
if sup.procs[0][1].poll() is not None:
|
||||
break
|
||||
time.sleep(0.05)
|
||||
|
||||
self.assertEqual(1, sup.procs[0][1].returncode,
|
||||
"crasher should have exited 1")
|
||||
self.assertIsNone(sup.procs[1][1].poll(),
|
||||
"longrun should still be running")
|
||||
self.assertIsNone(sup.shutdown_at,
|
||||
"supervisor must not initiate shutdown on child death")
|
||||
|
||||
# Clean up — explicit signal-driven shutdown.
|
||||
sup.request_shutdown(reason="test-teardown")
|
||||
self._drive(sup)
|
||||
|
||||
def test_crash_then_signal_surfaces_nonzero_exit_code(self):
|
||||
# The crasher's exit code is what reaches the container
|
||||
# exit even though shutdown was triggered by SIGTERM.
|
||||
# exit_code() = max(child returncodes) → 1 wins over the
|
||||
# signal-killed longrun's negative returncode.
|
||||
specs = [
|
||||
_DaemonSpec("crasher", ("/bin/sh", "-c", "exit 1")),
|
||||
_DaemonSpec("longrun", ("/bin/sleep", "30")),
|
||||
]
|
||||
sup = _Supervisor(specs)
|
||||
sup.start_all()
|
||||
time.sleep(0.3) # let crasher die
|
||||
sup.request_shutdown(reason="test")
|
||||
rc = self._drive(sup)
|
||||
self.assertEqual(1, rc)
|
||||
|
||||
def test_all_children_die_unattended_loop_converges(self):
|
||||
# If nobody sends a signal but every child eventually
|
||||
# dies on its own, the supervisor still exits — nothing
|
||||
# left to supervise.
|
||||
specs = [
|
||||
_DaemonSpec("a", ("/bin/sh", "-c", "exit 0")),
|
||||
_DaemonSpec("b", ("/bin/sh", "-c", "exit 2")),
|
||||
]
|
||||
sup = _Supervisor(specs)
|
||||
sup.start_all()
|
||||
rc = self._drive(sup)
|
||||
self.assertEqual(2, rc)
|
||||
self.assertIsNone(sup.shutdown_at)
|
||||
|
||||
def test_shutdown_after_start_terminates_children(self):
|
||||
# Two long-running children. Caller requests shutdown;
|
||||
# both should receive SIGTERM and exit. exit_code() is
|
||||
# max of (returncodes) — both signal-killed (negative),
|
||||
# so max() picks 0 in the typical case (or the
|
||||
# platform-specific signal returncode).
|
||||
specs = [
|
||||
_DaemonSpec("a", ("/bin/sleep", "60")),
|
||||
_DaemonSpec("b", ("/bin/sleep", "60")),
|
||||
]
|
||||
sup = _Supervisor(specs)
|
||||
sup.start_all()
|
||||
time.sleep(0.2) # let them actually start
|
||||
sup.request_shutdown(reason="test")
|
||||
rc = self._drive(sup)
|
||||
self.assertIsNotNone(rc)
|
||||
# Both children got the signal — neither survived past
|
||||
# the grace deadline.
|
||||
for _, p in sup.procs:
|
||||
self.assertIsNotNone(p.returncode)
|
||||
|
||||
def test_grace_period_escalates_to_sigkill(self):
|
||||
# A child that ignores SIGTERM. The supervisor's
|
||||
# _GRACE_SECONDS is 8s globally; we patch it to 0.3 so the
|
||||
# test stays fast.
|
||||
ignore_term = (
|
||||
"/bin/sh", "-c",
|
||||
"trap '' TERM; sleep 30",
|
||||
)
|
||||
specs = [_DaemonSpec("stubborn", ignore_term)]
|
||||
sup = _Supervisor(specs)
|
||||
sup.start_all()
|
||||
time.sleep(0.3) # let `trap` register
|
||||
sup.request_shutdown(reason="test")
|
||||
|
||||
with patch("claude_bottle.sidecar_init._GRACE_SECONDS", 0.3):
|
||||
rc = self._drive(sup, max_wait_s=4.0)
|
||||
|
||||
# Process was SIGKILL'd → returncode -9 on POSIX.
|
||||
self.assertEqual(-9, sup.procs[0][1].returncode)
|
||||
self.assertIsNotNone(rc)
|
||||
|
||||
def test_idempotent_shutdown_requests(self):
|
||||
specs = [_DaemonSpec("a", ("/bin/sleep", "60"))]
|
||||
sup = _Supervisor(specs)
|
||||
sup.start_all()
|
||||
time.sleep(0.1)
|
||||
first_at = None
|
||||
sup.request_shutdown(reason="first")
|
||||
first_at = sup.shutdown_at
|
||||
# Second call must NOT reset the deadline (otherwise the
|
||||
# grace timer is gameable by a noisy signal).
|
||||
sup.request_shutdown(reason="second")
|
||||
self.assertEqual(first_at, sup.shutdown_at)
|
||||
self._drive(sup)
|
||||
|
||||
|
||||
class TestMainEndToEnd(unittest.TestCase):
|
||||
"""Run sidecar_init.py as a real subprocess to cover the
|
||||
signal-handler installation path. Skipped on platforms
|
||||
without /bin/sleep + /bin/sh."""
|
||||
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
for p in ("/bin/sh", "/bin/sleep"):
|
||||
if not Path(p).exists():
|
||||
raise unittest.SkipTest(f"missing {p}")
|
||||
|
||||
def _run(self, daemons_csv: str, send_signal: int | None,
|
||||
wait_before_signal: float = 0.4,
|
||||
overall_timeout: float = 6.0) -> tuple[int, str]:
|
||||
"""Spawn sidecar_init.main() in a child process with the
|
||||
DAEMONS list patched to harmless `sleep 30` commands.
|
||||
Returns (returncode, captured stdout)."""
|
||||
|
||||
helper = (
|
||||
"import os, runpy, sys\n"
|
||||
"from claude_bottle import sidecar_init as si\n"
|
||||
"si._DAEMONS = (\n"
|
||||
" si._DaemonSpec('alpha', ('/bin/sleep','30')),\n"
|
||||
" si._DaemonSpec('beta', ('/bin/sleep','30')),\n"
|
||||
")\n"
|
||||
"sys.exit(si.main([]))\n"
|
||||
)
|
||||
env = {**os.environ, "CLAUDE_BOTTLE_SIDECAR_DAEMONS": daemons_csv}
|
||||
proc = subprocess.Popen(
|
||||
[sys.executable, "-c", helper],
|
||||
stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
|
||||
env=env,
|
||||
)
|
||||
try:
|
||||
if send_signal is not None:
|
||||
time.sleep(wait_before_signal)
|
||||
proc.send_signal(send_signal)
|
||||
out_b, _ = proc.communicate(timeout=overall_timeout)
|
||||
except subprocess.TimeoutExpired:
|
||||
proc.kill()
|
||||
out_b, _ = proc.communicate()
|
||||
self.fail("sidecar_init main() did not exit before timeout")
|
||||
return proc.returncode, out_b.decode("utf-8", errors="replace")
|
||||
|
||||
def test_sigterm_clean_shutdown(self):
|
||||
rc, out = self._run("alpha,beta", signal.SIGTERM)
|
||||
self.assertIn("starting alpha", out)
|
||||
self.assertIn("starting beta", out)
|
||||
self.assertIn("forwarding SIGTERM", out)
|
||||
# Sleep terminated by SIGTERM exits with returncode -15;
|
||||
# supervisor surfaces that via max(...) and main()
|
||||
# returns -15 → process exit becomes 256-15 = 241.
|
||||
# On macOS bash may convert to 143. Either way, nonzero
|
||||
# AND the child finished — we don't pin the exact code.
|
||||
self.assertNotEqual(0, rc)
|
||||
|
||||
def test_empty_daemon_set_exits_zero_immediately(self):
|
||||
# Use a sentinel value that filters out both alpha+beta.
|
||||
rc, out = self._run("nothing", send_signal=None,
|
||||
overall_timeout=2.0)
|
||||
self.assertEqual(0, rc)
|
||||
self.assertIn("no daemons selected", out)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user