Compare commits

..

2 Commits

Author SHA1 Message Date
didericis-claude df469b2f47 docs: add role and git.fetch to egress route fields table
Both fields were missing from the reference table added in the preceding
commit — `role` is visible in examples/bottles/claude.md and `git.fetch`
is documented in PRD 0052 but neither appeared in the README table.
2026-06-22 18:31:32 +00:00
didericis d1d9e7a105 docs: document egress matches, dlp fields, and detector defaults
lint / lint (push) Successful in 1m32s
2026-06-19 21:58:20 -04:00
6 changed files with 79 additions and 158 deletions
+26 -2
View File
@@ -14,7 +14,7 @@
## Features
- **Per-bottle egress allowlist** — TLS-bumped HTTP/HTTPS chokepoint with a per-manifest host allowlist and request-body DLP scanner; DoH and arbitrary hosts blocked by default.
- **Per-bottle egress allowlist** — TLS-bumped HTTP/HTTPS chokepoint with a per-manifest host allowlist; per-route path/method/header `matches` filtering; outbound DLP scanning for known tokens and secrets, inbound DLP scanning for prompt-injection attempts; DoH and arbitrary hosts blocked by default.
- **Tokens the agent never sees** — host secrets live in a sidecar; the agent dials `http://sidecar:9099/<path>` and the proxy strips inbound `Authorization` and injects the real token before forwarding. `printenv` in the agent shows proxy URLs only.
- **Gitleaks-scanned push (git-gate)** — `bottle.git` remotes route through a per-bottle `git daemon` that gitleaks-scans incoming refs pre-receive and forwards clean refs upstream over SSH. The agent never holds the upstream credential.
- **Manifest-scoped skills + secrets** — each bottle declares its skills, env, git identity, remotes, and egress routes; unknown keys die at load.
@@ -106,8 +106,15 @@ egress:
routes:
- host: gitea.dideric.is
auth:
scheme: token
scheme: token # Bearer | token
token_ref: BOT_BOTTLE_GITEA_TOKEN
matches: # optional — restrict to specific paths/methods/headers
- paths:
- {type: prefix, value: /api/v1/}
methods: [GET, POST, PATCH, DELETE]
dlp: # optional — per-route detector overrides (default: all on)
outbound_detectors: [token_patterns, known_secrets]
inbound_detectors: false # disable response scanning for this host
---
The `gitea-dev` bottle. Provider auth via the inherited Claude route;
@@ -126,6 +133,23 @@ skills:
You help maintain Gitea-hosted projects.
````
**Egress route fields:**
| Field | Required | Description |
|---|---|---|
| `host` | yes | Hostname to allowlist. One entry per host. |
| `role` | no | Provider-specific role string (e.g. `claude_code_oauth`). Wires built-in auth flows; set by provider templates, not manually. |
| `auth.scheme` | when `auth` present | `Bearer` or `token`. Injected by the proxy; the agent never sees the value. |
| `auth.token_ref` | when `auth` present | Env-var name holding the secret on the host. |
| `matches` | no | Array of `{paths, methods, headers}` filters. A request must match at least one entry (if any are given) to be forwarded. |
| `matches[].paths` | no | Array of `{type, value}`. `type` is `prefix` (default), `exact`, or `regex`. |
| `matches[].methods` | no | Array of HTTP method strings, e.g. `[GET, POST]`. |
| `matches[].headers` | no | Array of `{name, value, type}`. `type` is `exact` (default) or `regex`. |
| `dlp` | no | Per-route DLP overrides. Omit to use defaults (all detectors on). |
| `dlp.outbound_detectors` | no | `false` disables outbound scanning; list restricts to named detectors (`token_patterns`, `known_secrets`). |
| `dlp.inbound_detectors` | no | `false` disables inbound scanning; list restricts to named detectors (`naive_injection_detection`). |
| `git.fetch` | no | `true` permits smart HTTP clone/fetch (`git-upload-pack`) for this host. Push (`git-receive-pack`) remains blocked. |
More examples in `examples/`. Full design lives under `docs/prds/`; the trust-boundary rationale is in `docs/prds/0011-per-file-md-manifest.md`.
## Trademarks
+17 -5
View File
@@ -45,7 +45,7 @@ from ..agent_provider import AgentProvisionPlan, get_provider, build_agent_provi
from ..egress import EgressPlan
from ..git_gate import GitGatePlan
from ..log import die, info
from ..manifest import Manifest
from ..manifest import ManifestGitEntry, Manifest
from ..supervise import SupervisePlan
from ..util import expand_tilde
from ..env import resolve_env, ResolvedEnv
@@ -356,14 +356,16 @@ class BottleBackend(ABC, Generic[PlanT, CleanupT]):
pass
def _validate(self, spec: BottleSpec) -> None:
"""Cross-backend pre-launch checks. Confirms the agent exists
and the named skills are present on the host. Subclasses with
additional preconditions should override and call
`super()._validate(spec)` first."""
"""Cross-backend pre-launch checks. Confirms the agent exists,
the named skills are present on the host, and every git
IdentityFile resolves. Subclasses with additional preconditions
should override and call `super()._validate(spec)` first."""
manifest = spec.manifest
manifest.require_agent(spec.agent_name)
agent = manifest.agents[spec.agent_name]
bottle = manifest.bottle_for(spec.agent_name)
self._validate_skills(agent.skills)
self._validate_git_entries(bottle.git)
self._validate_agent_provider_dockerfile(spec)
def _validate_skills(self, skills: Sequence[str]) -> None:
@@ -378,6 +380,16 @@ class BottleBackend(ABC, Generic[PlanT, CleanupT]):
f"Create it under ~/.claude/skills/, then re-run."
)
def _validate_git_entries(self, entries: Sequence[ManifestGitEntry]) -> None:
"""Each entry's IdentityFile must exist on the host (after
expanding leading ~) — the git-gate copies it in at start time
to authenticate the upstream push (PRD 0008). Shape is already
enforced by Manifest validation; this only checks presence."""
for entry in entries:
key = expand_tilde(entry.IdentityFile)
if not os.path.isfile(key):
die(f"git upstream key file not found for '{entry.Name}': {key}")
def _validate_agent_provider_dockerfile(self, spec: BottleSpec) -> None:
bottle = spec.manifest.bottle_for(spec.agent_name)
dockerfile = bottle.agent_provider.dockerfile
+18 -66
View File
@@ -5,20 +5,16 @@ from __future__ import annotations
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from .manifest import ManifestBottle
from .manifest import ManifestBottle, ManifestGitEntry
from .manifest_egress import ManifestEgressConfig
def resolve_bottles(raws: dict[str, dict[str, object]]) -> dict[str, ManifestBottle]:
"""Apply `extends:` chains and return resolved ManifestBottle objects."""
cache: dict[str, ManifestBottle] = {}
# Per-bottle effective git-gate.repos, as raw dicts keyed by repo name.
# Threaded alongside `cache` so a child can field-merge against its
# parent's repos without reconstructing them from parsed entries.
repos_cache: dict[str, dict[str, object]] = {}
for name in raws:
if name not in cache:
_resolve_one_bottle(name, raws, cache, repos_cache, ())
_resolve_one_bottle(name, raws, cache, ())
return cache
@@ -26,7 +22,6 @@ def _resolve_one_bottle(
name: str,
raws: dict[str, dict[str, object]],
cache: dict[str, ManifestBottle],
repos_cache: dict[str, dict[str, object]],
seen: tuple[str, ...],
) -> ManifestBottle:
from .manifest import ManifestBottle, ManifestError
@@ -46,7 +41,6 @@ def _resolve_one_bottle(
if parent_name_raw is None:
bottle = ManifestBottle.from_dict(name, child_raw)
cache[name] = bottle
repos_cache[name] = _resolve_repos_raw({}, child_raw)
return bottle
if not isinstance(parent_name_raw, str):
@@ -66,33 +60,20 @@ def _resolve_one_bottle(
f"bottle '{name}' extends '{parent_name}' which is not "
f"defined. Available bottles: {avail}"
)
parent = _resolve_one_bottle(
parent_name, raws, cache, repos_cache, seen + (name,)
)
merged_repos_raw = _resolve_repos_raw(repos_cache[parent_name], child_raw)
bottle = _merge_bottles(parent, child_raw, merged_repos_raw, name)
parent = _resolve_one_bottle(parent_name, raws, cache, seen + (name,))
bottle = _merge_bottles(parent, child_raw, name)
cache[name] = bottle
repos_cache[name] = merged_repos_raw
return bottle
def _merge_bottles(
parent: ManifestBottle,
child_raw: dict[str, object],
merged_repos_raw: dict[str, object],
name: str,
) -> ManifestBottle:
"""Apply PRD 0025 merge rules."""
from .manifest import ManifestBottle, ManifestGitUser
from .manifest_egress import validate_egress_routes
from .manifest_util import as_json_object
# git-gate.repos: when the child declares repos, inject the already
# name-merged repo set (computed by _resolve_repos_raw) so the child
# parses with the full inherited+overridden list (issue #237).
if _child_declares_git_gate_repos(child_raw):
git_raw = as_json_object(child_raw.get("git-gate", {}), "child git-gate")
child_raw = {**child_raw, "git-gate": {**git_raw, "repos": merged_repos_raw}}
# Parse the child's declared fields into a ManifestBottle (with the
# usual defaults for anything missing). Validation runs the same
@@ -111,11 +92,11 @@ def _merge_bottles(
email=child.git_user.email or parent.git_user.email,
)
# git-gate.repos: when declared, child.git already holds the merged
# set (an explicit empty dict clears parent, leaving child.git empty).
# When omitted, the parent's entries are inherited verbatim.
# git-gate.repos: missing means inherit; an explicit empty object
# clears; otherwise parent and child merge by UpstreamHost with
# child entries replacing duplicate hosts.
if _child_declares_git_gate_repos(child_raw):
merged_git = child.git
merged_git = _merge_git_remotes(parent.git, child.git) if child.git else ()
else:
merged_git = parent.git
@@ -149,45 +130,6 @@ def _merge_bottles(
)
def _resolve_repos_raw(
parent_repos: dict[str, object],
child_raw: dict[str, object],
) -> dict[str, object]:
"""Compute a bottle's effective git-gate.repos as raw dicts.
Repos are keyed by name. When the child omits git-gate.repos it
inherits the parent's set verbatim; an explicit empty dict clears it.
Otherwise parent and child unite by name, with same-name entries
field-merged (parent fields are defaults, child fields win)."""
from .manifest_util import as_json_object
if not _child_declares_git_gate_repos(child_raw):
return parent_repos
child_repos = _declared_repos_raw(child_raw)
if not child_repos:
return {}
# Parent entries keep their order; child-only names are appended.
names = list(parent_repos) + [n for n in child_repos if n not in parent_repos]
return {
name: {
**as_json_object(parent_repos.get(name, {}), "parent git-gate repo"),
**as_json_object(child_repos.get(name, {}), "child git-gate repo"),
}
for name in names
}
def _declared_repos_raw(child_raw: dict[str, object]) -> dict[str, object]:
"""Return the child's explicitly declared git-gate.repos as raw dicts,
or an empty dict when none are declared."""
from .manifest_util import as_json_object
if not _child_declares_git_gate_repos(child_raw):
return {}
git_raw = as_json_object(child_raw.get("git-gate", {}), "child git-gate")
return as_json_object(git_raw.get("repos", {}), "child git-gate.repos")
def _child_declares_git_gate_repos(child_raw: dict[str, object]) -> bool:
from .manifest_util import as_json_object
@@ -198,6 +140,16 @@ def _child_declares_git_gate_repos(child_raw: dict[str, object]) -> bool:
return "repos" in git_obj
def _merge_git_remotes(
parent: tuple[ManifestGitEntry, ...],
child: tuple[ManifestGitEntry, ...],
) -> tuple[ManifestGitEntry, ...]:
by_host = {entry.UpstreamHost: entry for entry in parent}
for entry in child:
by_host[entry.UpstreamHost] = entry
return tuple(by_host.values())
def _merge_egress(
parent: ManifestEgressConfig,
child: ManifestEgressConfig,
+6 -1
View File
@@ -5,10 +5,15 @@ agent_provider:
egress:
routes:
- host: api.anthropic.com
role: claude_code_oauth
role: claude_code_oauth # wires Claude Code OAuth; do not change
auth:
scheme: Bearer
token_ref: BOT_BOTTLE_CLAUDE_OAUTH_TOKEN
# dlp is omitted → all detectors on by default (token_patterns,
# known_secrets outbound; naive_injection_detection inbound).
# To disable inbound scanning for this route:
# dlp:
# inbound_detectors: false
---
Common Claude provider boundary. Drop this file into
+4 -3
View File
@@ -92,9 +92,10 @@ class TestSandboxEscape(unittest.TestCase):
"on PATH: curl -sSL https://smolmachines.com/install.sh | sh"
)
# Throwaway "identity file" for the git-gate's `identity` field.
# It need not be a real SSH key: test 5 reaches gitleaks before
# any SSH attempt anyway.
# Throwaway "identity file" so the manifest's _validate_git_entries
# passes (it only checks `os.path.isfile`, not that the content is
# a real SSH key). Test 5 reaches gitleaks before any SSH attempt
# anyway.
fd, kp = tempfile.mkstemp(prefix="sandbox-test-key.")
os.close(fd)
cls._key_path = Path(kp)
+8 -81
View File
@@ -113,8 +113,8 @@ class TestExtendsEnvMerge(unittest.TestCase):
class TestExtendsGitMerge(unittest.TestCase):
"""git-gate.user overlays by field; git-gate.repos merges by name,
with same-name child entries merging field-by-field (child wins)."""
"""git-gate.user overlays by field; git-gate.repos merges by upstream
host, with child entries replacing duplicate hosts."""
_GIT_ENTRY_A = {"url": "ssh://git@host-a/a.git", "key": {"provider": "static", "path": "/dev/null"}}
_GIT_ENTRY_B = {"url": "ssh://git@host-b/b.git", "key": {"provider": "static", "path": "/dev/null"}}
@@ -130,21 +130,19 @@ class TestExtendsGitMerge(unittest.TestCase):
names = [e.Name for e in m.bottles["child"].git]
self.assertEqual(["a", "b"], names)
def test_child_git_repo_different_name_same_host_coexists(self):
# Repos are keyed by Name, not UpstreamHost: two repos with
# different names on the same host both survive the merge.
same_host_b = {"url": "ssh://git@host-a/b.git", "key": {"provider": "static", "path": "/dev/null"}}
def test_child_git_repo_replaces_same_host(self):
replacement = {"url": "ssh://git@host-a/replacement.git", "key": {"provider": "static", "path": "/dev/null"}}
m = _build(
base={"git-gate": {"repos": {"a": self._GIT_ENTRY_A}}},
child={
"extends": "base",
"git-gate": {"repos": {"a2": same_host_b}},
"git-gate": {"repos": {"a2": replacement}},
},
)
entries = m.bottles["child"].git
self.assertEqual(2, len(entries))
names = {e.Name for e in entries}
self.assertEqual({"a", "a2"}, names)
self.assertEqual(1, len(entries))
self.assertEqual("a2", entries[0].Name)
self.assertEqual("replacement.git", entries[0].UpstreamPath)
def test_child_omits_git_gate_inherits_full_list(self):
m = _build(
@@ -166,77 +164,6 @@ class TestExtendsGitMerge(unittest.TestCase):
)
self.assertEqual((), m.bottles["child"].git)
def test_child_same_name_repo_merges_key_field(self):
# Issue #237: child repo with same name as parent should merge
# field-by-field. Child overrides only `key`; parent's url and
# host_key are preserved.
parent_entry = {
"url": "ssh://git@host-a/repo.git",
"host_key": "ecdsa-sha2-nistp256 AAAA",
"key": {"provider": "static", "path": "/keys/id_rsa"},
}
m = _build(
base={"git-gate": {"repos": {"repo": parent_entry}}},
child={
"extends": "base",
"git-gate": {"repos": {"repo": {
"key": {"provider": "gitea", "forge_token_env": "GITEA_TOKEN"},
}}},
},
)
entries = m.bottles["child"].git
self.assertEqual(1, len(entries))
e = entries[0]
self.assertEqual("repo", e.Name)
self.assertEqual("ssh://git@host-a/repo.git", e.Upstream)
self.assertEqual("ecdsa-sha2-nistp256 AAAA", e.KnownHostKey)
self.assertEqual("gitea", e.Key.provider)
self.assertEqual("GITEA_TOKEN", e.Key.forge_token_env)
def test_child_same_name_repo_overrides_url(self):
# Child can override url on a same-name repo; other parent fields
# fall through.
parent_entry = {
"url": "ssh://git@host-a/old.git",
"key": {"provider": "static", "path": "/keys/id_rsa"},
}
m = _build(
base={"git-gate": {"repos": {"repo": parent_entry}}},
child={
"extends": "base",
"git-gate": {"repos": {"repo": {
"url": "ssh://git@host-b/new.git",
"key": {"provider": "static", "path": "/keys/id_rsa"},
}}},
},
)
entries = m.bottles["child"].git
self.assertEqual(1, len(entries))
self.assertEqual("ssh://git@host-b/new.git", entries[0].Upstream)
def test_child_same_name_plus_new_repo(self):
# Same-name repo is field-merged; a distinct new name in child
# is appended.
parent_entry = {
"url": "ssh://git@host-a/repo.git",
"key": {"provider": "static", "path": "/keys/id_rsa"},
}
m = _build(
base={"git-gate": {"repos": {"repo": parent_entry}}},
child={
"extends": "base",
"git-gate": {"repos": {
"repo": {"key": {"provider": "gitea", "forge_token_env": "TOK"}},
"other": self._GIT_ENTRY_B,
}},
},
)
child = m.bottles["child"]
names = {e.Name for e in child.git}
self.assertEqual({"repo", "other"}, names)
repo_entry = next(e for e in child.git if e.Name == "repo")
self.assertEqual("gitea", repo_entry.Key.provider)
def test_child_git_user_inherits_parent_repos(self):
m = _build(
base={"git-gate": {"repos": {"a": self._GIT_ENTRY_A}}},