Merge pull request 'feat(sidecars): bundle image + init supervisor (PRD 0024 chunk 1)' (#55) from prd-0024-chunk-1-bundle-image into main
This commit was merged in pull request #55.
This commit is contained in:
@@ -0,0 +1,108 @@
|
|||||||
|
# Per-bottle sidecar bundle image (PRD 0024).
|
||||||
|
#
|
||||||
|
# Collapses the four prior per-sidecar images (pipelock, egress,
|
||||||
|
# git-gate, supervise) into one. A small stdlib-Python init
|
||||||
|
# supervisor at /app/sidecar_init.py spawns all four daemons,
|
||||||
|
# forwards SIGTERM, and propagates per-daemon stdout/stderr to the
|
||||||
|
# container log with a `[name]` prefix. See PRD 0024 for the
|
||||||
|
# rationale.
|
||||||
|
#
|
||||||
|
# Layout (preserved verbatim from the prior four Dockerfiles so the
|
||||||
|
# compose renderer's bind-mount paths and docker-cp targets keep
|
||||||
|
# working):
|
||||||
|
#
|
||||||
|
# /usr/local/bin/pipelock pipelock binary
|
||||||
|
# /usr/bin/gitleaks gitleaks binary
|
||||||
|
# /app/egress_addon.py + siblings mitmproxy addon (egress)
|
||||||
|
# /app/egress-entrypoint.sh mitmdump launcher
|
||||||
|
# /app/supervise_server.py + .py supervise MCP server
|
||||||
|
# /app/sidecar_init.py PID 1 supervisor
|
||||||
|
# /etc/pipelock.yaml bind-mounted at run time
|
||||||
|
# /etc/egress/routes.yaml bind-mounted at run time
|
||||||
|
# /etc/git-gate/pre-receive docker-cp'd at start time
|
||||||
|
# /git-gate-entrypoint.sh docker-cp'd at start time
|
||||||
|
# /git-gate/creds/* docker-cp'd at start time
|
||||||
|
# /git/* bare repos, populated at runtime
|
||||||
|
# /run/supervise/queue/ bind-mounted at run time
|
||||||
|
# /home/mitmproxy/.mitmproxy/ mitmproxy CA dir
|
||||||
|
#
|
||||||
|
# Exposed ports inside the container:
|
||||||
|
# 8888 pipelock (HTTPS_PROXY)
|
||||||
|
# 9099 egress (mitmproxy, pipelock's upstream — not externally
|
||||||
|
# addressed by the agent)
|
||||||
|
# 9418 git-gate (git-daemon)
|
||||||
|
# 9100 supervise (MCP HTTP)
|
||||||
|
|
||||||
|
# Stage 1: pipelock binary. The upstream pipelock image is a
|
||||||
|
# scratch image with the binary at /pipelock (entrypoint).
|
||||||
|
# Pinned by digest in lockstep with
|
||||||
|
# claude_bottle/backend/docker/pipelock.py:PIPELOCK_IMAGE.
|
||||||
|
FROM ghcr.io/luckypipewrench/pipelock@sha256:3b1a39417b98406ddc5dc2d8fcb42865ddc0c68a43d355db55f0f8cb06bc6de9 AS pipelock-src
|
||||||
|
|
||||||
|
# Stage 2: gitleaks binary. The upstream gitleaks image is alpine
|
||||||
|
# with the binary at /usr/bin/gitleaks. Pinned by digest in lockstep
|
||||||
|
# with Dockerfile.git-gate's prior base (now deleted at chunk 3).
|
||||||
|
FROM zricethezav/gitleaks@sha256:c00b6bd0aeb3071cbcb79009cb16a60dd9e0a7c60e2be9ab65d25e6bc8abbb7f AS gitleaks-src
|
||||||
|
|
||||||
|
# Stage 3: assembly. mitmproxy/mitmproxy is debian-slim-based with
|
||||||
|
# Python + mitmdump pre-installed — heavier than the others, so
|
||||||
|
# this stage starts there and pulls the standalone binaries in.
|
||||||
|
FROM mitmproxy/mitmproxy:11.1.3
|
||||||
|
|
||||||
|
# Run as root inside the bundle. The bundle is the isolation
|
||||||
|
# boundary; per-daemon user separation inside it is not load-bearing
|
||||||
|
# and complicates the supervisor's spawn path.
|
||||||
|
USER root
|
||||||
|
|
||||||
|
# Runtime system deps:
|
||||||
|
# git supplies the `git daemon` subcommand (no separate package)
|
||||||
|
# plus the core `git` binary the pre-receive hook invokes.
|
||||||
|
# openssh-client supplies the upstream SSH transport the
|
||||||
|
# pre-receive hook uses to forward accepted refs.
|
||||||
|
# ca-certificates is needed for both pipelock and mitmdump
|
||||||
|
# upstream TLS (the base image already has it; listed for
|
||||||
|
# explicitness).
|
||||||
|
RUN apt-get update \
|
||||||
|
&& apt-get install -y --no-install-recommends \
|
||||||
|
git openssh-client ca-certificates \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# Pull the standalone binaries into the final image.
|
||||||
|
COPY --from=pipelock-src /pipelock /usr/local/bin/pipelock
|
||||||
|
COPY --from=gitleaks-src /usr/bin/gitleaks /usr/bin/gitleaks
|
||||||
|
|
||||||
|
# Project Python: addon + server modules + the init supervisor.
|
||||||
|
# Kept flat under /app/ so mitmdump's loader resolves them as
|
||||||
|
# top-level siblings (absolute imports), matching the prior
|
||||||
|
# Dockerfile.egress / Dockerfile.supervise layout.
|
||||||
|
COPY claude_bottle/egress_addon_core.py /app/egress_addon_core.py
|
||||||
|
COPY claude_bottle/egress_addon.py /app/egress_addon.py
|
||||||
|
COPY claude_bottle/yaml_subset.py /app/yaml_subset.py
|
||||||
|
COPY claude_bottle/supervise.py /app/supervise.py
|
||||||
|
COPY claude_bottle/supervise_server.py /app/supervise_server.py
|
||||||
|
COPY claude_bottle/sidecar_init.py /app/sidecar_init.py
|
||||||
|
COPY claude_bottle/egress_entrypoint.sh /app/egress-entrypoint.sh
|
||||||
|
RUN chmod +x /app/egress-entrypoint.sh
|
||||||
|
|
||||||
|
# Pre-create runtime directories the compose renderer + start
|
||||||
|
# step expect to exist. `docker cp` does not create intermediate
|
||||||
|
# dirs, and bind mounts won't either if the parent is missing.
|
||||||
|
RUN mkdir -p \
|
||||||
|
/etc/egress \
|
||||||
|
/etc/git-gate \
|
||||||
|
/git-gate/creds \
|
||||||
|
/git \
|
||||||
|
/run/supervise/queue \
|
||||||
|
/home/mitmproxy/.mitmproxy
|
||||||
|
|
||||||
|
# Documentation only — the compose renderer publishes whichever
|
||||||
|
# subset the bottle uses.
|
||||||
|
EXPOSE 8888 9099 9418 9100
|
||||||
|
|
||||||
|
# WORKDIR matches Dockerfile.supervise's prior layout so the
|
||||||
|
# in-app same-dir import in supervise_server.py stays deterministic.
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# PID 1 is the supervisor. It owns signal handling and exit-code
|
||||||
|
# propagation; no `exec` chain in the entrypoint itself.
|
||||||
|
ENTRYPOINT ["python3", "/app/sidecar_init.py"]
|
||||||
@@ -0,0 +1,35 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
# Egress daemon entrypoint inside the sidecar bundle (PRD 0024).
|
||||||
|
#
|
||||||
|
# Extracted verbatim from Dockerfile.egress's prior inline `sh -c`
|
||||||
|
# ENTRYPOINT so the supervisor in claude_bottle/sidecar_init.py can
|
||||||
|
# call it as a normal child. Behavior is unchanged:
|
||||||
|
#
|
||||||
|
# * Upstream proxy: when EGRESS_UPSTREAM_PROXY is set, switch
|
||||||
|
# to `--mode upstream:URL` to forward all post-MITM traffic
|
||||||
|
# through pipelock. mitmproxy does NOT honor HTTPS_PROXY on
|
||||||
|
# its outbound side, so the upstream wiring has to be the
|
||||||
|
# mitmproxy mode flag, not env.
|
||||||
|
# * Upstream trust: when EGRESS_UPSTREAM_CA is set, build a
|
||||||
|
# combined trust bundle (system roots + pipelock CA) and point
|
||||||
|
# mitmproxy at it. The option REPLACES mitmproxy's default
|
||||||
|
# trust store, so passing pipelock's CA alone would break
|
||||||
|
# pipelock-passthrough hosts (api.anthropic.com etc.).
|
||||||
|
# * `-s /app/egress_addon.py` loads the addon that reads
|
||||||
|
# /etc/egress/routes.yaml.
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
MODE="--mode regular@9099"
|
||||||
|
if [ -n "$EGRESS_UPSTREAM_PROXY" ]; then
|
||||||
|
MODE="--mode upstream:$EGRESS_UPSTREAM_PROXY --listen-port 9099"
|
||||||
|
fi
|
||||||
|
|
||||||
|
TRUST_FLAG=""
|
||||||
|
if [ -n "$EGRESS_UPSTREAM_CA" ] && [ -f "$EGRESS_UPSTREAM_CA" ]; then
|
||||||
|
COMBINED=/home/mitmproxy/.mitmproxy/combined-trust.pem
|
||||||
|
cat /etc/ssl/certs/ca-certificates.crt "$EGRESS_UPSTREAM_CA" > "$COMBINED"
|
||||||
|
TRUST_FLAG="--set ssl_verify_upstream_trusted_ca=$COMBINED"
|
||||||
|
fi
|
||||||
|
|
||||||
|
exec mitmdump $MODE $TRUST_FLAG -s /app/egress_addon.py
|
||||||
@@ -0,0 +1,219 @@
|
|||||||
|
"""Per-bottle sidecar supervisor (PRD 0024 chunk 1).
|
||||||
|
|
||||||
|
PID 1 inside the `claude-bottle-sidecars` bundle image. Spawns
|
||||||
|
the configured daemons (egress, pipelock, git-gate, supervise),
|
||||||
|
forwards SIGTERM/SIGINT to each child, and propagates per-daemon
|
||||||
|
stdout+stderr to the container log with a `[name] ` prefix.
|
||||||
|
|
||||||
|
Failure policy (interim): when a child dies unexpectedly, the
|
||||||
|
supervisor logs the death and leaves the surviving children
|
||||||
|
running. The bundle stays up; whatever the dead daemon served
|
||||||
|
will start failing, surfacing in the agent's own error path.
|
||||||
|
The supervisor itself exits only when (a) the operator/compose
|
||||||
|
sends SIGTERM/SIGINT, or (b) every child has died.
|
||||||
|
|
||||||
|
Failure policy (eventual): on unexpected death, the supervisor
|
||||||
|
restarts the daemon and emits a notification to the supervise
|
||||||
|
sidecar so the operator sees the event. That lands in a later
|
||||||
|
PR; the interim policy is "don't take the bundle down for one
|
||||||
|
sick daemon."
|
||||||
|
|
||||||
|
Daemon subset is env-driven. The compose renderer narrows it via
|
||||||
|
`CLAUDE_BOTTLE_SIDECAR_DAEMONS=egress,pipelock` for bottles that
|
||||||
|
don't use git-gate or supervise. Default: all four.
|
||||||
|
|
||||||
|
Stdlib-only by design — adding supervisord/s6/runit for four
|
||||||
|
daemons is heavier than this script.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
import signal
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import IO, Sequence
|
||||||
|
|
||||||
|
|
||||||
|
# Below compose's default 10s `stop_grace_period`. After this many
|
||||||
|
# seconds past SIGTERM, escalate to SIGKILL on any still-running
|
||||||
|
# child.
|
||||||
|
_GRACE_SECONDS = 8.0
|
||||||
|
|
||||||
|
# Tight enough that exits and signals propagate without lag; loose
|
||||||
|
# enough that the main loop isn't a CPU hog.
|
||||||
|
_POLL_INTERVAL = 0.1
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class _DaemonSpec:
|
||||||
|
name: str
|
||||||
|
argv: Sequence[str]
|
||||||
|
|
||||||
|
|
||||||
|
# Order matters only for first-launch race-window reasons: egress
|
||||||
|
# starts first so pipelock's upstream connect succeeds during
|
||||||
|
# pipelock's own startup. git-gate and supervise are independent.
|
||||||
|
_DAEMONS: tuple[_DaemonSpec, ...] = (
|
||||||
|
_DaemonSpec("egress", ("/bin/sh", "/app/egress-entrypoint.sh")),
|
||||||
|
_DaemonSpec(
|
||||||
|
"pipelock",
|
||||||
|
("/usr/local/bin/pipelock", "run", "--config", "/etc/pipelock.yaml"),
|
||||||
|
),
|
||||||
|
_DaemonSpec("git-gate", ("/bin/sh", "/git-gate-entrypoint.sh")),
|
||||||
|
_DaemonSpec("supervise", ("python3", "/app/supervise_server.py")),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _selected_daemons(
|
||||||
|
env: dict[str, str],
|
||||||
|
all_daemons: Sequence[_DaemonSpec] | None = None,
|
||||||
|
) -> tuple[_DaemonSpec, ...]:
|
||||||
|
"""Filter the daemon set by the CLAUDE_BOTTLE_SIDECAR_DAEMONS env
|
||||||
|
var. Unknown names in the list are ignored — the renderer is the
|
||||||
|
source of truth for which daemons are wired.
|
||||||
|
|
||||||
|
`all_daemons` defaults to `_DAEMONS` resolved at call time (not
|
||||||
|
at definition time), so tests can monkey-patch the module-level
|
||||||
|
`_DAEMONS` and have the new value take effect."""
|
||||||
|
if all_daemons is None:
|
||||||
|
all_daemons = _DAEMONS
|
||||||
|
raw = env.get("CLAUDE_BOTTLE_SIDECAR_DAEMONS", "").strip()
|
||||||
|
if not raw:
|
||||||
|
return tuple(all_daemons)
|
||||||
|
wanted = {n.strip() for n in raw.split(",") if n.strip()}
|
||||||
|
return tuple(d for d in all_daemons if d.name in wanted)
|
||||||
|
|
||||||
|
|
||||||
|
def _log(msg: str) -> None:
|
||||||
|
sys.stdout.write(f"sidecar-init: {msg}\n")
|
||||||
|
sys.stdout.flush()
|
||||||
|
|
||||||
|
|
||||||
|
def _pump(name: str, stream: IO[bytes]) -> None:
|
||||||
|
"""Read lines from `stream`, prefix with `[name]`, write to
|
||||||
|
stdout. Runs in its own thread per child; daemon=True so a
|
||||||
|
blocked read doesn't keep the process alive after main exits."""
|
||||||
|
for raw in iter(stream.readline, b""):
|
||||||
|
line = raw.decode("utf-8", errors="replace").rstrip("\n")
|
||||||
|
sys.stdout.write(f"[{name}] {line}\n")
|
||||||
|
sys.stdout.flush()
|
||||||
|
|
||||||
|
|
||||||
|
def _spawn(spec: _DaemonSpec) -> subprocess.Popen:
|
||||||
|
proc = subprocess.Popen(
|
||||||
|
list(spec.argv),
|
||||||
|
stdout=subprocess.PIPE,
|
||||||
|
stderr=subprocess.STDOUT,
|
||||||
|
bufsize=0,
|
||||||
|
)
|
||||||
|
threading.Thread(
|
||||||
|
target=_pump, args=(spec.name, proc.stdout), daemon=True
|
||||||
|
).start()
|
||||||
|
return proc
|
||||||
|
|
||||||
|
|
||||||
|
class _Supervisor:
|
||||||
|
"""Holds the running children + shutdown state. Pulled out so
|
||||||
|
the test suite can drive it with fake commands."""
|
||||||
|
|
||||||
|
def __init__(self, specs: Sequence[_DaemonSpec]):
|
||||||
|
self.specs = tuple(specs)
|
||||||
|
self.procs: list[tuple[_DaemonSpec, subprocess.Popen]] = []
|
||||||
|
self.shutdown_at: float | None = None
|
||||||
|
# Names of children that have been logged as having exited
|
||||||
|
# so we only log each death once across watch-loop ticks.
|
||||||
|
self._logged_dead: set[str] = set()
|
||||||
|
|
||||||
|
def start_all(self) -> None:
|
||||||
|
for spec in self.specs:
|
||||||
|
_log(f"starting {spec.name}")
|
||||||
|
self.procs.append((spec, _spawn(spec)))
|
||||||
|
|
||||||
|
def request_shutdown(self, reason: str) -> None:
|
||||||
|
if self.shutdown_at is not None:
|
||||||
|
return
|
||||||
|
self.shutdown_at = time.monotonic()
|
||||||
|
_log(f"shutting down ({reason}); forwarding SIGTERM")
|
||||||
|
for _, p in self.procs:
|
||||||
|
if p.poll() is None:
|
||||||
|
try:
|
||||||
|
p.terminate()
|
||||||
|
except ProcessLookupError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def tick(self) -> bool:
|
||||||
|
"""One iteration of the watch loop. Returns True when every
|
||||||
|
child has exited and the supervisor can return.
|
||||||
|
|
||||||
|
A child dying unexpectedly is logged but does NOT initiate
|
||||||
|
shutdown — see the module docstring's failure-policy
|
||||||
|
section. Shutdown is signal-driven only."""
|
||||||
|
for spec, p in self.procs:
|
||||||
|
rc = p.poll()
|
||||||
|
if rc is None or spec.name in self._logged_dead:
|
||||||
|
continue
|
||||||
|
self._logged_dead.add(spec.name)
|
||||||
|
if self.shutdown_at is None:
|
||||||
|
_log(
|
||||||
|
f"{spec.name} exited with code {rc}; leaving "
|
||||||
|
f"surviving daemons running (operator-visible "
|
||||||
|
f"via agent-side failure)"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
_log(f"{spec.name} exited with code {rc}")
|
||||||
|
|
||||||
|
if self.shutdown_at is not None:
|
||||||
|
elapsed = time.monotonic() - self.shutdown_at
|
||||||
|
if elapsed > _GRACE_SECONDS:
|
||||||
|
still_running = [
|
||||||
|
spec.name for spec, p in self.procs if p.poll() is None
|
||||||
|
]
|
||||||
|
if still_running:
|
||||||
|
_log(
|
||||||
|
f"grace ({_GRACE_SECONDS:.0f}s) elapsed; SIGKILL on "
|
||||||
|
f"{', '.join(still_running)}"
|
||||||
|
)
|
||||||
|
for _, p in self.procs:
|
||||||
|
if p.poll() is None:
|
||||||
|
try:
|
||||||
|
p.kill()
|
||||||
|
except ProcessLookupError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return all(p.poll() is not None for _, p in self.procs)
|
||||||
|
|
||||||
|
def exit_code(self) -> int:
|
||||||
|
"""Worst child returncode wins. On graceful shutdown every
|
||||||
|
child is signal-killed (negative returncode) and max()
|
||||||
|
returns 0; if some child crashed nonzero before the signal
|
||||||
|
the operator gets that code on container exit."""
|
||||||
|
return max((p.returncode for _, p in self.procs), default=0)
|
||||||
|
|
||||||
|
|
||||||
|
def main(argv: Sequence[str] | None = None) -> int:
|
||||||
|
del argv # no flags yet; env-driven only
|
||||||
|
specs = _selected_daemons(dict(os.environ))
|
||||||
|
if not specs:
|
||||||
|
_log("no daemons selected; nothing to do")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
sup = _Supervisor(specs)
|
||||||
|
sup.start_all()
|
||||||
|
|
||||||
|
signal.signal(signal.SIGTERM, lambda *_: sup.request_shutdown("SIGTERM"))
|
||||||
|
signal.signal(signal.SIGINT, lambda *_: sup.request_shutdown("SIGINT"))
|
||||||
|
|
||||||
|
while not sup.tick():
|
||||||
|
time.sleep(_POLL_INTERVAL)
|
||||||
|
|
||||||
|
rc = sup.exit_code()
|
||||||
|
_log(f"exit {rc}")
|
||||||
|
return rc
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
sys.exit(main())
|
||||||
@@ -390,17 +390,21 @@ rewrite.
|
|||||||
## Open questions
|
## Open questions
|
||||||
|
|
||||||
1. **Init failure semantics.** When one daemon crashes mid-run,
|
1. **Init failure semantics.** When one daemon crashes mid-run,
|
||||||
should the bundle exit (killing the bottle) or restart just
|
the bundle does NOT tear down the survivors — the failure is
|
||||||
that daemon? Today, with four separate containers, docker
|
logged, the surviving daemons keep running, and whatever the
|
||||||
restarts the crashed one and the bottle stays up. Default
|
dead one served starts failing in a way the agent surfaces.
|
||||||
for this PRD: bundle exits on any child death; the bottle
|
The eventual design is restart-the-dead-daemon plus a
|
||||||
tears down. Restart logic can land later if operators hit
|
notification to the supervise sidecar so the operator sees
|
||||||
it in practice.
|
the event explicitly; chunk 1 ships only the "log and leave
|
||||||
2. **Exit-code propagation.** If multiple daemons die in quick
|
alone" half. Tear-down-the-bundle was considered and
|
||||||
succession (likely under SIGTERM), which exit code wins?
|
rejected: one sick daemon shouldn't take the bottle offline.
|
||||||
First-to-die is simplest. Worst-case (highest nonzero
|
2. **Exit-code propagation.** When the supervisor finally exits
|
||||||
exit code) gives clearest signal in logs. Default to
|
(signal-driven shutdown, or every child having died on its
|
||||||
first-to-die unless an operator scenario disagrees.
|
own), the container exits with `max(child returncodes)` —
|
||||||
|
the worst nonzero code wins. On graceful shutdown every child
|
||||||
|
is signal-killed (negative returncode) so the max is 0; a
|
||||||
|
crashed-before-signal daemon's nonzero code wins and reaches
|
||||||
|
the operator on container exit.
|
||||||
3. **Image pin policy.** Pin `claude-bottle-sidecars` by tag
|
3. **Image pin policy.** Pin `claude-bottle-sidecars` by tag
|
||||||
(`:latest` rebuilt per-release) or by digest written into a
|
(`:latest` rebuilt per-release) or by digest written into a
|
||||||
`CLAUDE_BOTTLE_SIDECAR_IMAGE` env var like the existing
|
`CLAUDE_BOTTLE_SIDECAR_IMAGE` env var like the existing
|
||||||
|
|||||||
@@ -0,0 +1,122 @@
|
|||||||
|
"""Integration: PRD 0024 chunk 1 — the sidecar bundle image builds
|
||||||
|
and the four daemon binaries are present + executable inside it.
|
||||||
|
|
||||||
|
This test does NOT exercise the daemons running against real
|
||||||
|
config (pipelock.yaml, routes.yaml, etc) — that lands in chunk 2
|
||||||
|
when the renderer wires the bundle into compose. What we verify
|
||||||
|
here is the chunk-1 contract:
|
||||||
|
|
||||||
|
- Dockerfile.sidecars builds (multi-stage works, base layers
|
||||||
|
pull, COPYs resolve).
|
||||||
|
- pipelock, gitleaks, mitmdump are at the documented paths and
|
||||||
|
answer `--version`.
|
||||||
|
- The Python init at /app/sidecar_init.py runs and prints the
|
||||||
|
expected "no daemons selected" line when the supervisor is
|
||||||
|
pointed at an empty daemon set.
|
||||||
|
|
||||||
|
Skips cleanly when docker is unavailable, or under act_runner
|
||||||
|
where the host bind-mount topology breaks multi-stage builds
|
||||||
|
that pull large bases.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
from tests._docker import skip_unless_docker
|
||||||
|
|
||||||
|
|
||||||
|
_IMAGE = "claude-bottle-sidecars-test:chunk1"
|
||||||
|
_DOCKERFILE = "Dockerfile.sidecars"
|
||||||
|
|
||||||
|
|
||||||
|
@skip_unless_docker()
|
||||||
|
@unittest.skipIf(
|
||||||
|
os.environ.get("GITEA_ACTIONS") == "true",
|
||||||
|
"skipped under act_runner: multi-stage build pulls a 200+MB "
|
||||||
|
"mitmproxy base + two upstream sidecar images; runner storage "
|
||||||
|
"+ time budget make this an interactive-only test",
|
||||||
|
)
|
||||||
|
class TestSidecarBundleImage(unittest.TestCase):
|
||||||
|
"""Builds the image once for the class, then runs a few
|
||||||
|
`docker run` probes against it."""
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def setUpClass(cls) -> None:
|
||||||
|
repo_root = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
|
||||||
|
proc = subprocess.run(
|
||||||
|
["docker", "build", "-t", _IMAGE,
|
||||||
|
"-f", _DOCKERFILE, "."],
|
||||||
|
cwd=repo_root,
|
||||||
|
stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
|
||||||
|
)
|
||||||
|
if proc.returncode != 0:
|
||||||
|
raise unittest.SkipTest(
|
||||||
|
f"docker build failed; skipping image probes.\n"
|
||||||
|
f"{proc.stdout.decode('utf-8', errors='replace')[-2000:]}"
|
||||||
|
)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def tearDownClass(cls) -> None:
|
||||||
|
subprocess.run(
|
||||||
|
["docker", "image", "rm", "-f", _IMAGE],
|
||||||
|
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
|
||||||
|
)
|
||||||
|
|
||||||
|
def _run_in_image(self, *cmd: str, timeout: float = 30.0) -> tuple[int, str]:
|
||||||
|
proc = subprocess.run(
|
||||||
|
["docker", "run", "--rm", "--entrypoint", cmd[0], _IMAGE,
|
||||||
|
*cmd[1:]],
|
||||||
|
stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
|
||||||
|
timeout=timeout,
|
||||||
|
)
|
||||||
|
return proc.returncode, proc.stdout.decode("utf-8", errors="replace")
|
||||||
|
|
||||||
|
def test_pipelock_binary_present_and_versioned(self):
|
||||||
|
rc, out = self._run_in_image("/usr/local/bin/pipelock", "version")
|
||||||
|
self.assertEqual(0, rc, msg=out)
|
||||||
|
self.assertIn("pipelock version", out)
|
||||||
|
|
||||||
|
def test_gitleaks_binary_present_and_versioned(self):
|
||||||
|
rc, out = self._run_in_image("/usr/bin/gitleaks", "version")
|
||||||
|
self.assertEqual(0, rc, msg=out)
|
||||||
|
# gitleaks prints a bare version string like "v8.x.y".
|
||||||
|
self.assertRegex(out, r"v?\d+\.\d+")
|
||||||
|
|
||||||
|
def test_mitmdump_binary_present_and_versioned(self):
|
||||||
|
rc, out = self._run_in_image("mitmdump", "--version")
|
||||||
|
self.assertEqual(0, rc, msg=out)
|
||||||
|
self.assertIn("Mitmproxy", out)
|
||||||
|
|
||||||
|
def test_python_imports_supervise_module(self):
|
||||||
|
# The bundle's supervise daemon imports `supervise` as a
|
||||||
|
# same-directory sibling of `supervise_server`. Probe the
|
||||||
|
# import resolves with `python3 -c` from /app (the
|
||||||
|
# Dockerfile's WORKDIR).
|
||||||
|
rc, out = self._run_in_image(
|
||||||
|
"python3", "-c",
|
||||||
|
"import supervise; import supervise_server; print('ok')",
|
||||||
|
)
|
||||||
|
self.assertEqual(0, rc, msg=out)
|
||||||
|
self.assertIn("ok", out)
|
||||||
|
|
||||||
|
def test_init_supervisor_runs_with_no_daemons(self):
|
||||||
|
# `nothing` matches no canonical daemon → supervisor exits 0
|
||||||
|
# immediately with the documented message. Confirms the
|
||||||
|
# ENTRYPOINT wiring works.
|
||||||
|
proc = subprocess.run(
|
||||||
|
["docker", "run", "--rm",
|
||||||
|
"-e", "CLAUDE_BOTTLE_SIDECAR_DAEMONS=nothing",
|
||||||
|
_IMAGE],
|
||||||
|
stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
|
||||||
|
timeout=10.0,
|
||||||
|
)
|
||||||
|
out = proc.stdout.decode("utf-8", errors="replace")
|
||||||
|
self.assertEqual(0, proc.returncode, msg=out)
|
||||||
|
self.assertIn("no daemons selected", out)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
||||||
@@ -0,0 +1,299 @@
|
|||||||
|
"""Unit: sidecar bundle init supervisor (PRD 0024 chunk 1).
|
||||||
|
|
||||||
|
Tests both the helper functions in `claude_bottle.sidecar_init`
|
||||||
|
and the supervisor's end-to-end signal / exit-code behavior. The
|
||||||
|
end-to-end tests use real subprocesses (`/bin/sleep`,
|
||||||
|
`/bin/sh -c '...'`) — short-lived, no docker required — so they
|
||||||
|
run under `tests/unit/` rather than `tests/integration/`."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
import signal
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import unittest
|
||||||
|
from pathlib import Path
|
||||||
|
from unittest.mock import patch
|
||||||
|
|
||||||
|
from claude_bottle.sidecar_init import (
|
||||||
|
_DaemonSpec,
|
||||||
|
_Supervisor,
|
||||||
|
_selected_daemons,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestSelectedDaemons(unittest.TestCase):
|
||||||
|
"""Env-var subset filtering. The compose renderer is the source
|
||||||
|
of truth for which daemons are wired; the supervisor just
|
||||||
|
honors what it's told."""
|
||||||
|
|
||||||
|
_DAEMONS = (
|
||||||
|
_DaemonSpec("egress", ("/bin/sh", "-c", ":")),
|
||||||
|
_DaemonSpec("pipelock", ("/bin/sh", "-c", ":")),
|
||||||
|
_DaemonSpec("git-gate", ("/bin/sh", "-c", ":")),
|
||||||
|
_DaemonSpec("supervise", ("/bin/sh", "-c", ":")),
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_unset_returns_all(self):
|
||||||
|
got = _selected_daemons({}, all_daemons=self._DAEMONS)
|
||||||
|
self.assertEqual([d.name for d in got],
|
||||||
|
["egress", "pipelock", "git-gate", "supervise"])
|
||||||
|
|
||||||
|
def test_empty_returns_all(self):
|
||||||
|
got = _selected_daemons({"CLAUDE_BOTTLE_SIDECAR_DAEMONS": ""},
|
||||||
|
all_daemons=self._DAEMONS)
|
||||||
|
self.assertEqual(4, len(got))
|
||||||
|
|
||||||
|
def test_whitespace_only_returns_all(self):
|
||||||
|
got = _selected_daemons({"CLAUDE_BOTTLE_SIDECAR_DAEMONS": " "},
|
||||||
|
all_daemons=self._DAEMONS)
|
||||||
|
self.assertEqual(4, len(got))
|
||||||
|
|
||||||
|
def test_explicit_subset(self):
|
||||||
|
got = _selected_daemons(
|
||||||
|
{"CLAUDE_BOTTLE_SIDECAR_DAEMONS": "egress,pipelock"},
|
||||||
|
all_daemons=self._DAEMONS,
|
||||||
|
)
|
||||||
|
self.assertEqual([d.name for d in got], ["egress", "pipelock"])
|
||||||
|
|
||||||
|
def test_preserves_canonical_order(self):
|
||||||
|
# Order in the env var doesn't matter; the result follows
|
||||||
|
# the canonical _DAEMONS order so egress starts before
|
||||||
|
# pipelock (race-window reason).
|
||||||
|
got = _selected_daemons(
|
||||||
|
{"CLAUDE_BOTTLE_SIDECAR_DAEMONS": "supervise,pipelock,egress"},
|
||||||
|
all_daemons=self._DAEMONS,
|
||||||
|
)
|
||||||
|
self.assertEqual([d.name for d in got],
|
||||||
|
["egress", "pipelock", "supervise"])
|
||||||
|
|
||||||
|
def test_unknown_names_ignored(self):
|
||||||
|
got = _selected_daemons(
|
||||||
|
{"CLAUDE_BOTTLE_SIDECAR_DAEMONS": "egress,bogus"},
|
||||||
|
all_daemons=self._DAEMONS,
|
||||||
|
)
|
||||||
|
self.assertEqual([d.name for d in got], ["egress"])
|
||||||
|
|
||||||
|
def test_whitespace_in_names_stripped(self):
|
||||||
|
got = _selected_daemons(
|
||||||
|
{"CLAUDE_BOTTLE_SIDECAR_DAEMONS": " egress , pipelock "},
|
||||||
|
all_daemons=self._DAEMONS,
|
||||||
|
)
|
||||||
|
self.assertEqual([d.name for d in got], ["egress", "pipelock"])
|
||||||
|
|
||||||
|
|
||||||
|
class TestSupervisor(unittest.TestCase):
|
||||||
|
"""End-to-end: drive `_Supervisor` directly with fake commands.
|
||||||
|
We don't go through `main()` because main installs signal
|
||||||
|
handlers process-wide, which collides with the test runner."""
|
||||||
|
|
||||||
|
def _drive(self, sup: _Supervisor, max_wait_s: float = 6.0) -> int:
|
||||||
|
deadline = time.monotonic() + max_wait_s
|
||||||
|
while not sup.tick():
|
||||||
|
if time.monotonic() > deadline:
|
||||||
|
self.fail("supervisor watch loop did not converge in time")
|
||||||
|
time.sleep(0.05)
|
||||||
|
return sup.exit_code()
|
||||||
|
|
||||||
|
def test_all_children_succeed_returns_zero(self):
|
||||||
|
# `sh -c :` exits 0 immediately. With the new failure
|
||||||
|
# policy a child dying doesn't trigger shutdown, so the
|
||||||
|
# loop only converges once BOTH have exited on their own.
|
||||||
|
# Both exit 0 → max(0, 0) = 0.
|
||||||
|
specs = [
|
||||||
|
_DaemonSpec("a", ("/bin/sh", "-c", ":")),
|
||||||
|
_DaemonSpec("b", ("/bin/sh", "-c", ":")),
|
||||||
|
]
|
||||||
|
sup = _Supervisor(specs)
|
||||||
|
sup.start_all()
|
||||||
|
rc = self._drive(sup)
|
||||||
|
self.assertEqual(0, rc)
|
||||||
|
|
||||||
|
def test_child_crash_does_not_initiate_shutdown(self):
|
||||||
|
# Failure policy (PRD 0024, interim): a child dying
|
||||||
|
# unexpectedly is logged but the supervisor does NOT tear
|
||||||
|
# down the survivors. Verified by giving the crasher
|
||||||
|
# ~0.3s to die, then asserting the long-runner is still
|
||||||
|
# up and the supervisor never set shutdown_at.
|
||||||
|
specs = [
|
||||||
|
_DaemonSpec("crasher", ("/bin/sh", "-c", "exit 1")),
|
||||||
|
_DaemonSpec("longrun", ("/bin/sleep", "30")),
|
||||||
|
]
|
||||||
|
sup = _Supervisor(specs)
|
||||||
|
sup.start_all()
|
||||||
|
# Drive ticks for a while; crasher should die, longrun
|
||||||
|
# should survive.
|
||||||
|
deadline = time.monotonic() + 1.0
|
||||||
|
while time.monotonic() < deadline:
|
||||||
|
done = sup.tick()
|
||||||
|
self.assertFalse(done, "loop converged with a child still alive")
|
||||||
|
if sup.procs[0][1].poll() is not None:
|
||||||
|
break
|
||||||
|
time.sleep(0.05)
|
||||||
|
|
||||||
|
self.assertEqual(1, sup.procs[0][1].returncode,
|
||||||
|
"crasher should have exited 1")
|
||||||
|
self.assertIsNone(sup.procs[1][1].poll(),
|
||||||
|
"longrun should still be running")
|
||||||
|
self.assertIsNone(sup.shutdown_at,
|
||||||
|
"supervisor must not initiate shutdown on child death")
|
||||||
|
|
||||||
|
# Clean up — explicit signal-driven shutdown.
|
||||||
|
sup.request_shutdown(reason="test-teardown")
|
||||||
|
self._drive(sup)
|
||||||
|
|
||||||
|
def test_crash_then_signal_surfaces_nonzero_exit_code(self):
|
||||||
|
# The crasher's exit code is what reaches the container
|
||||||
|
# exit even though shutdown was triggered by SIGTERM.
|
||||||
|
# exit_code() = max(child returncodes) → 1 wins over the
|
||||||
|
# signal-killed longrun's negative returncode.
|
||||||
|
specs = [
|
||||||
|
_DaemonSpec("crasher", ("/bin/sh", "-c", "exit 1")),
|
||||||
|
_DaemonSpec("longrun", ("/bin/sleep", "30")),
|
||||||
|
]
|
||||||
|
sup = _Supervisor(specs)
|
||||||
|
sup.start_all()
|
||||||
|
time.sleep(0.3) # let crasher die
|
||||||
|
sup.request_shutdown(reason="test")
|
||||||
|
rc = self._drive(sup)
|
||||||
|
self.assertEqual(1, rc)
|
||||||
|
|
||||||
|
def test_all_children_die_unattended_loop_converges(self):
|
||||||
|
# If nobody sends a signal but every child eventually
|
||||||
|
# dies on its own, the supervisor still exits — nothing
|
||||||
|
# left to supervise.
|
||||||
|
specs = [
|
||||||
|
_DaemonSpec("a", ("/bin/sh", "-c", "exit 0")),
|
||||||
|
_DaemonSpec("b", ("/bin/sh", "-c", "exit 2")),
|
||||||
|
]
|
||||||
|
sup = _Supervisor(specs)
|
||||||
|
sup.start_all()
|
||||||
|
rc = self._drive(sup)
|
||||||
|
self.assertEqual(2, rc)
|
||||||
|
self.assertIsNone(sup.shutdown_at)
|
||||||
|
|
||||||
|
def test_shutdown_after_start_terminates_children(self):
|
||||||
|
# Two long-running children. Caller requests shutdown;
|
||||||
|
# both should receive SIGTERM and exit. exit_code() is
|
||||||
|
# max of (returncodes) — both signal-killed (negative),
|
||||||
|
# so max() picks 0 in the typical case (or the
|
||||||
|
# platform-specific signal returncode).
|
||||||
|
specs = [
|
||||||
|
_DaemonSpec("a", ("/bin/sleep", "60")),
|
||||||
|
_DaemonSpec("b", ("/bin/sleep", "60")),
|
||||||
|
]
|
||||||
|
sup = _Supervisor(specs)
|
||||||
|
sup.start_all()
|
||||||
|
time.sleep(0.2) # let them actually start
|
||||||
|
sup.request_shutdown(reason="test")
|
||||||
|
rc = self._drive(sup)
|
||||||
|
self.assertIsNotNone(rc)
|
||||||
|
# Both children got the signal — neither survived past
|
||||||
|
# the grace deadline.
|
||||||
|
for _, p in sup.procs:
|
||||||
|
self.assertIsNotNone(p.returncode)
|
||||||
|
|
||||||
|
def test_grace_period_escalates_to_sigkill(self):
|
||||||
|
# A child that ignores SIGTERM. The supervisor's
|
||||||
|
# _GRACE_SECONDS is 8s globally; we patch it to 0.3 so the
|
||||||
|
# test stays fast.
|
||||||
|
ignore_term = (
|
||||||
|
"/bin/sh", "-c",
|
||||||
|
"trap '' TERM; sleep 30",
|
||||||
|
)
|
||||||
|
specs = [_DaemonSpec("stubborn", ignore_term)]
|
||||||
|
sup = _Supervisor(specs)
|
||||||
|
sup.start_all()
|
||||||
|
time.sleep(0.3) # let `trap` register
|
||||||
|
sup.request_shutdown(reason="test")
|
||||||
|
|
||||||
|
with patch("claude_bottle.sidecar_init._GRACE_SECONDS", 0.3):
|
||||||
|
rc = self._drive(sup, max_wait_s=4.0)
|
||||||
|
|
||||||
|
# Process was SIGKILL'd → returncode -9 on POSIX.
|
||||||
|
self.assertEqual(-9, sup.procs[0][1].returncode)
|
||||||
|
self.assertIsNotNone(rc)
|
||||||
|
|
||||||
|
def test_idempotent_shutdown_requests(self):
|
||||||
|
specs = [_DaemonSpec("a", ("/bin/sleep", "60"))]
|
||||||
|
sup = _Supervisor(specs)
|
||||||
|
sup.start_all()
|
||||||
|
time.sleep(0.1)
|
||||||
|
first_at = None
|
||||||
|
sup.request_shutdown(reason="first")
|
||||||
|
first_at = sup.shutdown_at
|
||||||
|
# Second call must NOT reset the deadline (otherwise the
|
||||||
|
# grace timer is gameable by a noisy signal).
|
||||||
|
sup.request_shutdown(reason="second")
|
||||||
|
self.assertEqual(first_at, sup.shutdown_at)
|
||||||
|
self._drive(sup)
|
||||||
|
|
||||||
|
|
||||||
|
class TestMainEndToEnd(unittest.TestCase):
|
||||||
|
"""Run sidecar_init.py as a real subprocess to cover the
|
||||||
|
signal-handler installation path. Skipped on platforms
|
||||||
|
without /bin/sleep + /bin/sh."""
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def setUpClass(cls):
|
||||||
|
for p in ("/bin/sh", "/bin/sleep"):
|
||||||
|
if not Path(p).exists():
|
||||||
|
raise unittest.SkipTest(f"missing {p}")
|
||||||
|
|
||||||
|
def _run(self, daemons_csv: str, send_signal: int | None,
|
||||||
|
wait_before_signal: float = 0.4,
|
||||||
|
overall_timeout: float = 6.0) -> tuple[int, str]:
|
||||||
|
"""Spawn sidecar_init.main() in a child process with the
|
||||||
|
DAEMONS list patched to harmless `sleep 30` commands.
|
||||||
|
Returns (returncode, captured stdout)."""
|
||||||
|
|
||||||
|
helper = (
|
||||||
|
"import os, runpy, sys\n"
|
||||||
|
"from claude_bottle import sidecar_init as si\n"
|
||||||
|
"si._DAEMONS = (\n"
|
||||||
|
" si._DaemonSpec('alpha', ('/bin/sleep','30')),\n"
|
||||||
|
" si._DaemonSpec('beta', ('/bin/sleep','30')),\n"
|
||||||
|
")\n"
|
||||||
|
"sys.exit(si.main([]))\n"
|
||||||
|
)
|
||||||
|
env = {**os.environ, "CLAUDE_BOTTLE_SIDECAR_DAEMONS": daemons_csv}
|
||||||
|
proc = subprocess.Popen(
|
||||||
|
[sys.executable, "-c", helper],
|
||||||
|
stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
|
||||||
|
env=env,
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
if send_signal is not None:
|
||||||
|
time.sleep(wait_before_signal)
|
||||||
|
proc.send_signal(send_signal)
|
||||||
|
out_b, _ = proc.communicate(timeout=overall_timeout)
|
||||||
|
except subprocess.TimeoutExpired:
|
||||||
|
proc.kill()
|
||||||
|
out_b, _ = proc.communicate()
|
||||||
|
self.fail("sidecar_init main() did not exit before timeout")
|
||||||
|
return proc.returncode, out_b.decode("utf-8", errors="replace")
|
||||||
|
|
||||||
|
def test_sigterm_clean_shutdown(self):
|
||||||
|
rc, out = self._run("alpha,beta", signal.SIGTERM)
|
||||||
|
self.assertIn("starting alpha", out)
|
||||||
|
self.assertIn("starting beta", out)
|
||||||
|
self.assertIn("forwarding SIGTERM", out)
|
||||||
|
# Sleep terminated by SIGTERM exits with returncode -15;
|
||||||
|
# supervisor surfaces that via max(...) and main()
|
||||||
|
# returns -15 → process exit becomes 256-15 = 241.
|
||||||
|
# On macOS bash may convert to 143. Either way, nonzero
|
||||||
|
# AND the child finished — we don't pin the exact code.
|
||||||
|
self.assertNotEqual(0, rc)
|
||||||
|
|
||||||
|
def test_empty_daemon_set_exits_zero_immediately(self):
|
||||||
|
# Use a sentinel value that filters out both alpha+beta.
|
||||||
|
rc, out = self._run("nothing", send_signal=None,
|
||||||
|
overall_timeout=2.0)
|
||||||
|
self.assertEqual(0, rc)
|
||||||
|
self.assertIn("no daemons selected", out)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
||||||
Reference in New Issue
Block a user