From 3c2585cb9836bd9a27a84ddf71ad9603bfd37758 Mon Sep 17 00:00:00 2001
From: didericis <eric@dideric.is>
Date: Tue, 26 May 2026 02:31:46 -0400
Subject: [PATCH] fix(apply): write routes/pipelock yaml in place, not via
 rename
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

PRD 0018 chunk 3's atomicity fix used write-temp-then-rename to
update bind-mounted config files. POSIX rename atomically swaps
the inode at the host path — but Docker single-file bind mounts
on Linux pin the source inode at mount time, so post-rename the
container's mount points at the now-orphaned old inode and never
sees the new content. The egress sidecar's SIGHUP-driven reload
re-reads the same stale file → "egress route updates aren't
updatable via the supervisor anymore".

Switch egress_apply + pipelock_apply to write in place (same
inode, truncated + rewritten). Lose file-level POSIX atomicity,
but:

  - egress: SIGHUP fires only AFTER the write returns; the
    addon's `load_routes` raises `ValueError` on a partial read
    and keeps the previous in-memory routes, so the in-process
    race window (already narrow) is non-disruptive.
  - pipelock: applies via `docker restart` rather than SIGHUP;
    restart serializes after the host write completes, so the
    container reads the fully-written file on next boot.

macOS Docker Desktop's file-sharing layer (virtiofs / osxfs)
silently re-resolves the path on rename, which is why this bug
didn't surface in dev tests on macOS. Linux native Docker is
the strict reading; the fix works on both.
---
 claude_bottle/backend/docker/egress_apply.py  | 65 ++++++++-----------
 .../backend/docker/pipelock_apply.py          | 49 +++++---------
 2 files changed, 44 insertions(+), 70 deletions(-)

diff --git a/claude_bottle/backend/docker/egress_apply.py b/claude_bottle/backend/docker/egress_apply.py
index 3f17e34..72cf212 100644
--- a/claude_bottle/backend/docker/egress_apply.py
+++ b/claude_bottle/backend/docker/egress_apply.py
@@ -23,10 +23,8 @@ operator can retry.
 from __future__ import annotations
 
 import json
-import os
 import re
 import subprocess
-import tempfile
 from pathlib import Path
 
 from ...egress import EGRESS_ROUTES_IN_CONTAINER
@@ -195,47 +193,36 @@ def apply_routes_change(slug: str, new_content: str) -> tuple[str, str]:
     # and the operator gets a clear error about the half-state.
     _mirror_hosts_to_pipelock(slug, _hosts_in_routes(new_content))
 
-    # PRD 0018 chunk 3 + security item (c): routes.yaml is bind-
-    # mounted into the egress container, so the write target is the
-    # host path the sidecar reads through the mount. POSIX
-    # rename-onto-self is atomic on the same filesystem, so a sidecar
-    # SIGHUP racing the apply can never observe a half-written file —
-    # it sees either the old bytes or the new ones.
+    # routes.yaml is bind-mounted into the egress container as a
+    # SINGLE FILE. Docker single-file bind mounts pin the source
+    # inode at mount time; write-temp-then-rename swaps the inode
+    # on the host, which leaves the container's mount pointing at
+    # the now-orphaned old inode (so the SIGHUP'd reload re-reads
+    # unchanged content). Write in-place instead. Lose file-level
+    # atomicity, but the apply path issues SIGHUP only AFTER the
+    # write returns, and the addon's `load_routes` raises
+    # `ValueError` on a partial read and keeps the previous
+    # in-memory routes — so a SIGHUP that hypothetically raced an
+    # in-flight write is non-disruptive.
     target = _egress_routes_host_path(slug)
     target.parent.mkdir(parents=True, exist_ok=True)
-    fd, tmp_path_str = tempfile.mkstemp(
-        prefix=".egress_routes.", suffix=".yaml.tmp", dir=str(target.parent),
+    target.write_text(new_content)
+    # mitmproxy in the container reads through the bind mount as
+    # uid 1000; the host file has to be world-readable for that
+    # read to succeed (parent dir at 0o700 still restricts who
+    # can reach the file on the host). Routes content is not
+    # secret — tokens live in the container's environ — so 0o644
+    # is the right trade-off.
+    target.chmod(0o644)
+    sig = subprocess.run(
+        ["docker", "kill", "--signal", "HUP", container],
+        capture_output=True, text=True, check=False,
     )
-    tmp_path = Path(tmp_path_str)
-    try:
-        with os.fdopen(fd, "w") as f:
-            f.write(new_content)
-        # mitmproxy in the container reads through the bind mount as
-        # uid 1000; the host file has to be world-readable for that
-        # read to succeed (parent dir at 0o700 still restricts who
-        # can reach the file on the host). Routes content is not
-        # secret — tokens live in the container's environ — so 0o644
-        # is the right trade-off.
-        os.chmod(tmp_path, 0o644)
-        os.replace(tmp_path, target)
-        sig = subprocess.run(
-            ["docker", "kill", "--signal", "HUP", container],
-            capture_output=True, text=True, check=False,
+    if sig.returncode != 0:
+        raise EgressApplyError(
+            f"failed to SIGHUP {container}: "
+            f"{(sig.stderr or '').strip()}"
         )
-        if sig.returncode != 0:
-            raise EgressApplyError(
-                f"failed to SIGHUP {container}: "
-                f"{(sig.stderr or '').strip()}"
-            )
-    except BaseException:
-        # On any failure pre-rename, drop the tmp file. Post-rename
-        # there's nothing to clean up — `os.replace` is atomic so
-        # either the new file is in place or the old one still is.
-        try:
-            tmp_path.unlink()
-        except OSError:
-            pass
-        raise
 
     return before, new_content
 
diff --git a/claude_bottle/backend/docker/pipelock_apply.py b/claude_bottle/backend/docker/pipelock_apply.py
index fe2c4d0..95487cb 100644
--- a/claude_bottle/backend/docker/pipelock_apply.py
+++ b/claude_bottle/backend/docker/pipelock_apply.py
@@ -155,41 +155,28 @@ def apply_allowlist_change(
     cfg["api_allowlist"] = new_hosts
     rendered = pipelock_render_yaml(cfg)
 
-    # PRD 0018 chunk 3 + security item (c): pipelock.yaml is
-    # bind-mounted into the container, so the write target is the
-    # host path the sidecar reads. POSIX rename is atomic on the
-    # same filesystem, which matters less here than for the
-    # SIGHUP-reload egress case (pipelock fully restarts and
-    # re-reads on boot), but the pattern is uniform across both
-    # apply paths.
+    # pipelock.yaml is bind-mounted into the container as a SINGLE
+    # FILE — same Docker single-file inode issue as egress_apply:
+    # write-temp-then-rename swaps the host inode and leaves the
+    # container's mount pointing at the orphaned old one. Write
+    # in-place. `docker restart` below picks up the new content
+    # (and pipelock has no in-process reload anyway, so the
+    # restart is what makes it live regardless of write atomicity).
     target = _pipelock_yaml_host_path(slug)
     target.parent.mkdir(parents=True, exist_ok=True)
-    fd, tmp_path_str = tempfile.mkstemp(
-        prefix=".pipelock.", suffix=".yaml.tmp", dir=str(target.parent),
+    target.write_text(rendered)
+    # pipelock runs as root in its distroless image — any mode is
+    # fine — but 0o600 matches what prepare wrote.
+    target.chmod(0o600)
+    restart = subprocess.run(
+        ["docker", "restart", container],
+        capture_output=True, text=True, check=False,
     )
-    tmp_path = Path(tmp_path_str)
-    try:
-        with os.fdopen(fd, "w") as f:
-            f.write(rendered)
-        # pipelock runs as root in its distroless image — any mode
-        # is fine — but 0o600 matches what prepare wrote.
-        os.chmod(tmp_path, 0o600)
-        os.replace(tmp_path, target)
-        restart = subprocess.run(
-            ["docker", "restart", container],
-            capture_output=True, text=True, check=False,
+    if restart.returncode != 0:
+        raise PipelockApplyError(
+            f"failed to restart {container}: "
+            f"{(restart.stderr or '').strip()}"
         )
-        if restart.returncode != 0:
-            raise PipelockApplyError(
-                f"failed to restart {container}: "
-                f"{(restart.stderr or '').strip()}"
-            )
-    except BaseException:
-        try:
-            tmp_path.unlink()
-        except OSError:
-            pass
-        raise
 
     return before, after