PRD 0038: smolmachines env contract and secret-safe injection #141

Merged
didericis merged 3 commits from prd-0038-smolmachines-env-contract into main 2026-06-02 11:15:17 -04:00
3 changed files with 247 additions and 11 deletions
+12 -11
View File
@@ -28,6 +28,7 @@ from ...backend.docker.bottle_state import (
write_metadata,
)
from ...egress import Egress
from ...env import resolve_env
from ...git_gate import GitGate
from ...pipelock import PipelockProxy
from ...supervise import Supervise
@@ -77,18 +78,18 @@ def resolve_plan(
subnet, gateway, bundle_ip = smolmachines_bundle_subnet(slug)
# Agent's env: the prepare-time view doesn't yet know the
# host loopback ports the bundle's daemons get published on
# (those come from docker AFTER `docker run` returns), so
# HTTPS_PROXY / GIT_GATE_URL / MCP_SUPERVISE_URL are
# populated in launch.py and stamped onto guest_env there.
# What we set here is the part that doesn't depend on
# bundle bringup — bottle.env literals, the empty-NO_PROXY
# safe default, and the TLS trust env trio
# (NODE_EXTRA_CA_CERTS / SSL_CERT_FILE / REQUESTS_CA_BUNDLE)
# pointing at Debian's update-ca-certificates output bundle.
# Agent's env: resolve through resolve_env() so ?prompt entries
# are prompted and ${HOST_VAR} entries are interpolated — matching
# the Docker backend's contract. Forwarded (secret/interpolated)
# values still reach the guest as -e K=V smolvm flags because
# smolvm 0.8.0 has no env-file or stdin injection path; this is
# the known argv-exposure gap documented in PRD 0038.
# HTTPS_PROXY / GIT_GATE_URL / MCP_SUPERVISE_URL are populated
# in launch.py after bundle bringup.
resolved = resolve_env(manifest, spec.agent_name)
guest_env: dict[str, str] = {
**bottle.env,
**resolved.literals,
**resolved.forwarded,
"NO_PROXY": "localhost,127.0.0.1",
"NODE_EXTRA_CA_CERTS": "/etc/ssl/certs/ca-certificates.crt",
"SSL_CERT_FILE": "/etc/ssl/certs/ca-certificates.crt",
+102
View File
@@ -0,0 +1,102 @@
# PRD 0038: smolmachines Env Contract and Secret-Safe Injection
- **Status:** Active
- **Author:** didericis-codex
- **Created:** 2026-06-02
- **Issue:** #135
## Summary
Make smolmachines env handling match Docker's contract: resolve manifest env
entries through `resolve_env()`, keep secret and interpolated values out of
host argv, and document or enforce an explicit env contract for the backend.
## Problem
`bot_bottle/backend/smolmachines/prepare.py` builds the guest env from
`bottle.env` directly, bypassing `resolve_env()`. Entries like `?prompt` and
`${HOST_VAR}` can reach the guest literally rather than being prompted or
resolved. In contrast, Docker resolves env through `resolve_env()` before
writing a mode-600 env file.
`smolmachines/smolvm.py` renders env as `-e KEY=VALUE` on `smolvm machine
create` argv, and `SmolmachinesBottle.agent_argv` / `exec` prepend
`env KEY=VALUE …` onto the `smolvm machine exec` argv. Any literal or resolved
secret value is therefore visible in the host process table.
The two backends have no shared env contract document. Divergence will silently
widen as new manifest env features are added.
## Goals / Success Criteria
- Manifest env entries are resolved through `resolve_env()` before being
injected into the smolmachines guest, matching Docker behaviour.
- No manifest env value (literal or resolved) appears on host argv during
machine creation or exec.
- Define and document an explicit smolmachines env contract covering literals,
`?prompt` secrets, and `${HOST_VAR}` interpolations.
- Unit tests cover: literal passthrough, prompted-secret resolution,
host-var interpolation, and the no-argv-leak invariant.
## Non-goals
- No changes to the Docker env path.
- No changes to manifest schema or `resolve_env()` itself.
- No changes to smolmachines networking or mount handling.
- No new runtime dependencies.
## Scope
In scope:
- `bot_bottle/backend/smolmachines/prepare.py` env resolution.
- `bot_bottle/backend/smolmachines/smolvm.py` machine-create argv.
- `bot_bottle/backend/smolmachines/bottle.py` `agent_argv` / `exec` env
injection.
- `bot_bottle/env.py` if helper changes are needed to support the smolmachines
path.
- Unit tests in `tests/unit/` covering the above.
Out of scope:
- Integration tests that start a live smolmachines VM.
- Docker backend changes.
- Dashboard or CLI changes.
## Design
Run smolmachines env through `resolve_env()` at prepare time, exactly as Docker
does. After resolution, inject env into the guest through a mechanism that does
not expose values on host argv — for example by writing a mode-600 env file
into the machine's state directory and loading it at exec time, or by passing
env through `smolvm`'s stdin if the tool supports it.
If `smolvm` provides no stdin or env-file injection path, document this as a
known limitation and at minimum move env values behind a per-invocation
tmpfile rather than inline argv.
The env contract for smolmachines should mirror Docker's:
- Literals: passed as-is after resolution.
- `?prompt` entries: prompted at prepare time; resolved value injected, never
on argv.
- `${HOST_VAR}` entries: interpolated from the operator's env at prepare time;
resolved value injected, never on argv.
## Testing Strategy
- Unit tests for `prepare.py` asserting `resolve_env()` is called and that
resolution results are used rather than raw `bottle.env` values.
- Unit tests for `smolvm.py` machine-create argv asserting no env value appears
inline.
- Unit tests for `bottle.py` exec path asserting the same argv invariant.
Run:
- `python3 -m unittest tests.unit.test_smolmachines_prepare`
- `python3 -m unittest discover -s tests/unit`
## Open Questions
- Does `smolvm machine create` support an env-file flag or stdin injection that
avoids `-e KEY=VALUE` argv?
+133
View File
@@ -0,0 +1,133 @@
"""Unit: smolmachines prepare.py env resolution (PRD 0038)."""
from __future__ import annotations
import os
import tempfile
import unittest
from pathlib import Path
from unittest.mock import MagicMock, patch
from bot_bottle.agent_provider import AgentProvisionPlan
from bot_bottle.env import ResolvedEnv
class TestSmolmachinesResolveEnv(unittest.TestCase):
"""resolve_plan() must call resolve_env() and build guest_env
from the resolved values rather than from raw bottle.env."""
def _run_resolve_plan(
self,
resolved: ResolvedEnv,
*,
extra_host_env: dict[str, str] | None = None,
) -> dict[str, str]:
from bot_bottle.backend import BottleSpec
from bot_bottle.manifest import Manifest
with tempfile.TemporaryDirectory() as tmp:
stage = Path(tmp) / "stage"
stage.mkdir()
# Minimal manifest with one env literal so the spec is valid.
manifest = Manifest.from_json_obj({
"agents": {"myagent": {"bottle": "mybottle"}},
"bottles": {"mybottle": {"env": {"PLAIN": "literal-value"}}},
})
spec = BottleSpec(
manifest=manifest,
agent_name="myagent",
copy_cwd=False,
user_cwd=tmp,
identity="test-slug-00001",
)
from bot_bottle import supervise as _sup
orig_root = _sup.bot_bottle_root
_sup.bot_bottle_root = lambda: Path(tmp) / ".bot-bottle" # type: ignore[assignment]
host_env = {**os.environ, **(extra_host_env or {})}
try:
with (
patch("bot_bottle.backend.smolmachines.prepare.resolve_env",
return_value=resolved) as mock_resolve,
patch("bot_bottle.backend.smolmachines.prepare.smolmachines_preflight"),
patch("bot_bottle.backend.smolmachines.prepare.smolmachines_bundle_subnet",
return_value=("10.99.0.0/24", "10.99.0.1", "10.99.0.2")),
patch("bot_bottle.backend.smolmachines.prepare.GitGate") as mock_gg,
patch("bot_bottle.backend.smolmachines.prepare.PipelockProxy") as mock_pl,
patch("bot_bottle.backend.smolmachines.prepare.Egress") as mock_eg,
patch("bot_bottle.backend.smolmachines.prepare.Supervise"),
patch("bot_bottle.backend.smolmachines.prepare.agent_provision_plan") as mock_app,
patch("bot_bottle.backend.smolmachines.prepare.runtime_for"),
):
mock_gg.return_value.prepare.return_value = MagicMock()
mock_pl.return_value.prepare.return_value = MagicMock()
mock_eg.return_value.prepare.return_value = MagicMock()
def _make_provision(**kwargs):
return AgentProvisionPlan(
template="claude",
command="claude",
prompt_mode="append_file",
dockerfile="",
image="bot-bottle-claude:latest",
guest_env=dict(kwargs.get("guest_env") or {}),
)
mock_app.side_effect = lambda **kw: _make_provision(**kw)
from bot_bottle.backend.smolmachines.prepare import resolve_plan
plan = resolve_plan(spec, stage_dir=stage)
mock_resolve.assert_called_once_with(manifest, "myagent")
return dict(plan.guest_env)
finally:
_sup.bot_bottle_root = orig_root # type: ignore[assignment]
def test_literal_env_reaches_guest_env(self):
resolved = ResolvedEnv(
literals={"PLAIN": "hello"},
forwarded={},
)
guest_env = self._run_resolve_plan(resolved)
self.assertEqual("hello", guest_env["PLAIN"])
def test_forwarded_env_reaches_guest_env(self):
# Secrets / interpolated values land in forwarded; they must
# still reach the guest (argv exposure is the known gap).
resolved = ResolvedEnv(
literals={},
forwarded={"SECRET": "s3cr3t", "INTERP": "resolved-val"},
)
guest_env = self._run_resolve_plan(resolved)
self.assertEqual("s3cr3t", guest_env["SECRET"])
self.assertEqual("resolved-val", guest_env["INTERP"])
def test_raw_manifest_sentinel_not_in_guest_env(self):
# Before the fix, ?prompt and ${HOST} would appear verbatim.
# After the fix, resolve_env() is called so the caller sees
# the mocked resolved values (no raw sentinel survives).
resolved = ResolvedEnv(
literals={},
forwarded={"MY_SECRET": "actual-value"},
)
guest_env = self._run_resolve_plan(resolved)
for v in guest_env.values():
self.assertFalse(
v.startswith("?"),
f"raw secret sentinel survived in guest_env: {v!r}",
)
self.assertFalse(
v.startswith("${"),
f"raw interpolation sentinel survived in guest_env: {v!r}",
)
def test_tls_trust_env_always_present(self):
resolved = ResolvedEnv(literals={}, forwarded={})
guest_env = self._run_resolve_plan(resolved)
for key in ("NODE_EXTRA_CA_CERTS", "SSL_CERT_FILE", "REQUESTS_CA_BUNDLE"):
self.assertIn(key, guest_env, f"{key} missing from guest_env")
if __name__ == "__main__":
unittest.main()