Files
bot-bottle/tests/unit/test_smolmachines_pty_resize.py
T
didericis-claude aa5aa1f031
test / unit (pull_request) Successful in 26s
test / integration (pull_request) Successful in 41s
test / unit (push) Successful in 27s
test / integration (push) Successful in 45s
fix(smolmachines): defer pty_resize startup sync to dodge libkrun's bringup race
The b9853ae stdin=DEVNULL fix wasn't sufficient. End-to-end
testing against a live VM in tmux revealed a second crash path:
libkrun spits "load \`config.json\`: parse error: trailing
garbage { \"ociVersion\": \"1.0.2\", ... }" and the main exec
dies (rc=1 or SIGKILL/rc=137, depending on race scheduling).

Root cause: each `smolvm machine exec` writes a per-invocation
OCI config.json to the same smolvm state dir during its bringup.
The wrapper's startup sync() fires within 1ms of Popen-ing the
main exec — both invocations write config.json concurrently,
libkrun loads one mid-write, and gets garbage. Trivial inner
commands (`sh -c "echo hi"`) finished before the overlap
mattered, masking the race in earlier tests. claude's slower
startup hits the race every time, and only inside tmux because
the outside-tmux foreground-handoff path takes a different
bringup sequence that happens to dodge the window.

Fix: schedule the initial sync on a 2-second `threading.Timer`
instead of calling it synchronously. By 2s the main exec is
past its bringup window, so the side-channel's config.json
write doesn't collide. Daemon thread so the timer doesn't
block exit when the child finishes quickly.

Trade-off: the in-VM PTY uses smolvm's default size for the
first ~2s, then snaps to the host pane size when the timer
fires. Verified end-to-end against a live VM in tmux: claude
renders at the default size during bringup, then redraws at
full pane width once the deferred sync lands. Operator-driven
resizes (SIGWINCH) still bridge in real time via the
already-installed signal handler.

Also drop the diagnostic log added in 9c83ea6 — we have the
fix.

Regression test:
`TestStartupSyncDeferred.test_main_schedules_timer_does_not_
call_sync_synchronously` mocks Popen + Timer + _push_size and
asserts `main()` schedules the timer with the documented
delay constant and never invokes _push_size synchronously.
Catches a "let's just inline the sync() call" regression
immediately.

638 unit tests pass.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-27 20:55:00 -04:00

165 lines
6.4 KiB
Python

"""Unit: smolmachines pty_resize bridge (issue #82).
Locks down the parts of the wrapper we can test without spawning
real children or signalling — argument parsing, the side-channel
`smolvm machine exec` argv shape, and TTY-resolution fallback
across stdin/stdout/stderr.
"""
from __future__ import annotations
import io
import unittest
import unittest.mock
from unittest.mock import patch
from claude_bottle.backend.smolmachines import pty_resize
class TestPushSize(unittest.TestCase):
def test_emits_for_loop_over_all_pts_devices(self):
# The shell `for f in /dev/pts/*` handles multiple
# interactive sessions in the same VM (rare but cheap).
# Per-PTY `stty -F ... 2>/dev/null` swallows EBADF when a
# session has already exited.
with patch.object(pty_resize.subprocess, "run") as run:
pty_resize._push_size("claude-bottle-m", 50, 200)
argv = run.call_args.args[0]
self.assertEqual(
["smolvm", "machine", "exec", "--name",
"claude-bottle-m", "--", "sh", "-c"],
argv[:8],
)
# cols / rows land in the order stty wants them.
self.assertIn("cols 200", argv[8])
self.assertIn("rows 50", argv[8])
self.assertIn("for f in /dev/pts/*", argv[8])
def test_side_channel_uses_devnull_stdin(self):
# Load-bearing regression: under tmux, inheriting the
# pane PTY as the side-channel's stdin makes smolvm crash
# within ~100ms (concurrent smolvm processes sharing the
# PTY's FG-PG / input plumbing). DEVNULL stdin sidesteps
# the interaction.
with patch.object(pty_resize.subprocess, "run") as run:
pty_resize._push_size("claude-bottle-m", 24, 80)
self.assertEqual(
pty_resize.subprocess.DEVNULL,
run.call_args.kwargs.get("stdin"),
)
def test_swallows_subprocess_failures(self):
# `check=False` + DEVNULL streams: a side-channel failure
# mustn't break the operator's session.
with patch.object(
pty_resize.subprocess, "run",
side_effect=OSError("boom"),
):
with self.assertRaises(OSError):
pty_resize._push_size("m", 24, 80)
# The wrapper-level `sync()` is what swallows; `_push_size`
# itself raises so the test above documents that. The
# signal-handler-side `sync` in main wraps in try/except
# via the `if size is None: return` guard for the
# no-TTY case (no separate try needed because subprocess
# already has check=False; only fcntl.ioctl raising would
# surface, and _read_winsize handles that).
class TestReadWinsize(unittest.TestCase):
def test_returns_none_when_no_tty(self):
# Patch ioctl to always OSError — simulates the case where
# none of stdin/stdout/stderr is a TTY (e.g., tests, piped
# automation).
with patch.object(
pty_resize.fcntl, "ioctl",
side_effect=OSError("ENOTTY"),
):
self.assertIsNone(pty_resize._read_winsize())
def test_returns_first_tty_size(self):
# First fd that responds with a non-zero size wins —
# matches the "different surfaces give different TTYs"
# invariant noted in the module docstring.
import struct
calls: list[int] = []
def fake_ioctl(fd, req, buf):
calls.append(fd)
if fd == 0:
raise OSError("stdin not a tty")
return struct.pack("hhhh", 42, 137, 0, 0)
with patch.object(pty_resize.fcntl, "ioctl", side_effect=fake_ioctl):
self.assertEqual((42, 137), pty_resize._read_winsize())
def test_skips_zero_sizes(self):
# A TTY that reports `0 0` (the smolvm-allocated PTY's
# initial state, ironically) shouldn't be used as the
# source of truth — keep probing fallback fds.
import struct
responses = iter([
struct.pack("hhhh", 0, 0, 0, 0), # stdin: zero
struct.pack("hhhh", 24, 80, 0, 0), # stdout: real
])
def fake_ioctl(fd, req, buf):
return next(responses)
with patch.object(pty_resize.fcntl, "ioctl", side_effect=fake_ioctl):
self.assertEqual((24, 80), pty_resize._read_winsize())
class TestMainArgvParsing(unittest.TestCase):
def test_missing_separator_returns_error_exit_code(self):
# No `--` between machine name and inner argv.
with patch.object(pty_resize.sys, "stderr", new=io.StringIO()) as err:
rc = pty_resize.main(["claude-bottle-m", "smolvm", "machine"])
self.assertEqual(2, rc)
self.assertIn("usage:", err.getvalue())
def test_too_few_args_returns_error_exit_code(self):
with patch.object(pty_resize.sys, "stderr", new=io.StringIO()):
self.assertEqual(2, pty_resize.main([]))
self.assertEqual(2, pty_resize.main(["m"]))
self.assertEqual(2, pty_resize.main(["m", "--"]))
class TestStartupSyncDeferred(unittest.TestCase):
"""Regression: the initial sync MUST be deferred (timer), not
called synchronously between Popen + wait. Calling it
immediately races libkrun's per-exec OCI config write during
the main exec's bringup and crashes the child (rc=137 or
'parse error: trailing garbage')."""
def test_main_schedules_timer_does_not_call_sync_synchronously(self):
# Fake Popen + wait so main returns immediately. Patch
# Timer to record args without spawning a real thread.
# _push_size patched so any rogue synchronous call would
# be observable.
fake_proc = unittest.mock.MagicMock()
fake_proc.wait.return_value = 0
with patch.object(
pty_resize.subprocess, "Popen", return_value=fake_proc,
), patch.object(
pty_resize.threading, "Timer",
) as timer_cls, patch.object(
pty_resize, "_push_size",
) as push:
rc = pty_resize.main(["machine-name", "--", "echo", "hi"])
self.assertEqual(0, rc)
# Timer scheduled with the documented delay constant.
timer_cls.assert_called_once()
delay, callback = timer_cls.call_args.args
self.assertEqual(pty_resize._STARTUP_SYNC_DELAY_SEC, delay)
# _push_size never called synchronously — the only path to
# it is via the (mocked) timer's callback firing.
push.assert_not_called()
if __name__ == "__main__":
unittest.main()