From 3c2585cb9836bd9a27a84ddf71ad9603bfd37758 Mon Sep 17 00:00:00 2001 From: didericis Date: Tue, 26 May 2026 02:31:46 -0400 Subject: [PATCH] fix(apply): write routes/pipelock yaml in place, not via rename MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PRD 0018 chunk 3's atomicity fix used write-temp-then-rename to update bind-mounted config files. POSIX rename atomically swaps the inode at the host path — but Docker single-file bind mounts on Linux pin the source inode at mount time, so post-rename the container's mount points at the now-orphaned old inode and never sees the new content. The egress sidecar's SIGHUP-driven reload re-reads the same stale file → "egress route updates aren't updatable via the supervisor anymore". Switch egress_apply + pipelock_apply to write in place (same inode, truncated + rewritten). Lose file-level POSIX atomicity, but: - egress: SIGHUP fires only AFTER the write returns; the addon's `load_routes` raises `ValueError` on a partial read and keeps the previous in-memory routes, so the in-process race window (already narrow) is non-disruptive. - pipelock: applies via `docker restart` rather than SIGHUP; restart serializes after the host write completes, so the container reads the fully-written file on next boot. macOS Docker Desktop's file-sharing layer (virtiofs / osxfs) silently re-resolves the path on rename, which is why this bug didn't surface in dev tests on macOS. Linux native Docker is the strict reading; the fix works on both. --- claude_bottle/backend/docker/egress_apply.py | 65 ++++++++----------- .../backend/docker/pipelock_apply.py | 49 +++++--------- 2 files changed, 44 insertions(+), 70 deletions(-) diff --git a/claude_bottle/backend/docker/egress_apply.py b/claude_bottle/backend/docker/egress_apply.py index 3f17e34..72cf212 100644 --- a/claude_bottle/backend/docker/egress_apply.py +++ b/claude_bottle/backend/docker/egress_apply.py @@ -23,10 +23,8 @@ operator can retry. from __future__ import annotations import json -import os import re import subprocess -import tempfile from pathlib import Path from ...egress import EGRESS_ROUTES_IN_CONTAINER @@ -195,47 +193,36 @@ def apply_routes_change(slug: str, new_content: str) -> tuple[str, str]: # and the operator gets a clear error about the half-state. _mirror_hosts_to_pipelock(slug, _hosts_in_routes(new_content)) - # PRD 0018 chunk 3 + security item (c): routes.yaml is bind- - # mounted into the egress container, so the write target is the - # host path the sidecar reads through the mount. POSIX - # rename-onto-self is atomic on the same filesystem, so a sidecar - # SIGHUP racing the apply can never observe a half-written file — - # it sees either the old bytes or the new ones. + # routes.yaml is bind-mounted into the egress container as a + # SINGLE FILE. Docker single-file bind mounts pin the source + # inode at mount time; write-temp-then-rename swaps the inode + # on the host, which leaves the container's mount pointing at + # the now-orphaned old inode (so the SIGHUP'd reload re-reads + # unchanged content). Write in-place instead. Lose file-level + # atomicity, but the apply path issues SIGHUP only AFTER the + # write returns, and the addon's `load_routes` raises + # `ValueError` on a partial read and keeps the previous + # in-memory routes — so a SIGHUP that hypothetically raced an + # in-flight write is non-disruptive. target = _egress_routes_host_path(slug) target.parent.mkdir(parents=True, exist_ok=True) - fd, tmp_path_str = tempfile.mkstemp( - prefix=".egress_routes.", suffix=".yaml.tmp", dir=str(target.parent), + target.write_text(new_content) + # mitmproxy in the container reads through the bind mount as + # uid 1000; the host file has to be world-readable for that + # read to succeed (parent dir at 0o700 still restricts who + # can reach the file on the host). Routes content is not + # secret — tokens live in the container's environ — so 0o644 + # is the right trade-off. + target.chmod(0o644) + sig = subprocess.run( + ["docker", "kill", "--signal", "HUP", container], + capture_output=True, text=True, check=False, ) - tmp_path = Path(tmp_path_str) - try: - with os.fdopen(fd, "w") as f: - f.write(new_content) - # mitmproxy in the container reads through the bind mount as - # uid 1000; the host file has to be world-readable for that - # read to succeed (parent dir at 0o700 still restricts who - # can reach the file on the host). Routes content is not - # secret — tokens live in the container's environ — so 0o644 - # is the right trade-off. - os.chmod(tmp_path, 0o644) - os.replace(tmp_path, target) - sig = subprocess.run( - ["docker", "kill", "--signal", "HUP", container], - capture_output=True, text=True, check=False, + if sig.returncode != 0: + raise EgressApplyError( + f"failed to SIGHUP {container}: " + f"{(sig.stderr or '').strip()}" ) - if sig.returncode != 0: - raise EgressApplyError( - f"failed to SIGHUP {container}: " - f"{(sig.stderr or '').strip()}" - ) - except BaseException: - # On any failure pre-rename, drop the tmp file. Post-rename - # there's nothing to clean up — `os.replace` is atomic so - # either the new file is in place or the old one still is. - try: - tmp_path.unlink() - except OSError: - pass - raise return before, new_content diff --git a/claude_bottle/backend/docker/pipelock_apply.py b/claude_bottle/backend/docker/pipelock_apply.py index fe2c4d0..95487cb 100644 --- a/claude_bottle/backend/docker/pipelock_apply.py +++ b/claude_bottle/backend/docker/pipelock_apply.py @@ -155,41 +155,28 @@ def apply_allowlist_change( cfg["api_allowlist"] = new_hosts rendered = pipelock_render_yaml(cfg) - # PRD 0018 chunk 3 + security item (c): pipelock.yaml is - # bind-mounted into the container, so the write target is the - # host path the sidecar reads. POSIX rename is atomic on the - # same filesystem, which matters less here than for the - # SIGHUP-reload egress case (pipelock fully restarts and - # re-reads on boot), but the pattern is uniform across both - # apply paths. + # pipelock.yaml is bind-mounted into the container as a SINGLE + # FILE — same Docker single-file inode issue as egress_apply: + # write-temp-then-rename swaps the host inode and leaves the + # container's mount pointing at the orphaned old one. Write + # in-place. `docker restart` below picks up the new content + # (and pipelock has no in-process reload anyway, so the + # restart is what makes it live regardless of write atomicity). target = _pipelock_yaml_host_path(slug) target.parent.mkdir(parents=True, exist_ok=True) - fd, tmp_path_str = tempfile.mkstemp( - prefix=".pipelock.", suffix=".yaml.tmp", dir=str(target.parent), + target.write_text(rendered) + # pipelock runs as root in its distroless image — any mode is + # fine — but 0o600 matches what prepare wrote. + target.chmod(0o600) + restart = subprocess.run( + ["docker", "restart", container], + capture_output=True, text=True, check=False, ) - tmp_path = Path(tmp_path_str) - try: - with os.fdopen(fd, "w") as f: - f.write(rendered) - # pipelock runs as root in its distroless image — any mode - # is fine — but 0o600 matches what prepare wrote. - os.chmod(tmp_path, 0o600) - os.replace(tmp_path, target) - restart = subprocess.run( - ["docker", "restart", container], - capture_output=True, text=True, check=False, + if restart.returncode != 0: + raise PipelockApplyError( + f"failed to restart {container}: " + f"{(restart.stderr or '').strip()}" ) - if restart.returncode != 0: - raise PipelockApplyError( - f"failed to restart {container}: " - f"{(restart.stderr or '').strip()}" - ) - except BaseException: - try: - tmp_path.unlink() - except OSError: - pass - raise return before, after