chore(smolmachines): instrument pty_resize wrapper for crash diagnosis
User reports launch crashing only inside tmux (works outside). The wrapper itself runs fine in standalone tmux repros, so the break is in some interaction we can't see — curses eats stderr, default tmux remain-on-exit is off, and the pane closes before the operator can read anything. Add an always-on per-pid log at ~/.claude-bottle/pty_resize.log: - start record: argv, cwd, PATH, TMUX status - sync record: window size observed - child pid + exit rc - any KeyboardInterrupt forwarding - Popen failure traceback if it dies Append-mode, small overhead, easy to grep + share. Removable (along with the wrapper itself) once smolvm forwards SIGWINCH natively. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -35,6 +35,7 @@ follow-up tracked separately)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime
|
||||
import fcntl
|
||||
import os
|
||||
import signal
|
||||
@@ -42,6 +43,25 @@ import struct
|
||||
import subprocess
|
||||
import sys
|
||||
import termios
|
||||
import traceback
|
||||
|
||||
|
||||
# Debug log so we can diagnose tmux-pane crashes that happen in
|
||||
# pane respawn — the dashboard's curses surface eats stderr, and
|
||||
# `tmux respawn-pane`'s default remain-on-exit is off. Always-on
|
||||
# (small overhead) so a user reporting a crash can just share the
|
||||
# file. Append-mode, per-pid line prefix.
|
||||
_DEBUG_LOG_PATH = os.path.expanduser("~/.claude-bottle/pty_resize.log")
|
||||
|
||||
|
||||
def _log(msg: str) -> None:
|
||||
try:
|
||||
os.makedirs(os.path.dirname(_DEBUG_LOG_PATH), exist_ok=True)
|
||||
with open(_DEBUG_LOG_PATH, "a") as f:
|
||||
ts = datetime.datetime.now().isoformat(timespec="milliseconds")
|
||||
f.write(f"[{ts} pid={os.getpid()}] {msg}\n")
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
def _read_winsize() -> tuple[int, int] | None:
|
||||
@@ -90,17 +110,23 @@ def main(argv: list[str]) -> int:
|
||||
We don't use argparse — the `--` separator is the contract and
|
||||
everything past it is forwarded verbatim. Keeps the wrapper
|
||||
transparent for callers building argv programmatically."""
|
||||
_log(f"start argv={argv!r} cwd={os.getcwd()!r} "
|
||||
f"PATH={os.environ.get('PATH','')!r} "
|
||||
f"TMUX={os.environ.get('TMUX','<unset>')!r}")
|
||||
|
||||
if len(argv) < 3 or argv[1] != "--":
|
||||
sys.stderr.write(
|
||||
"usage: python -m claude_bottle.backend.smolmachines.pty_resize "
|
||||
"<machine> -- <smolvm-argv...>\n"
|
||||
)
|
||||
_log("exit=2 (bad argv)")
|
||||
return 2
|
||||
machine = argv[0]
|
||||
inner = argv[2:]
|
||||
|
||||
def sync(*_args) -> None:
|
||||
size = _read_winsize()
|
||||
_log(f"sync size={size!r}")
|
||||
if size is None:
|
||||
return
|
||||
_push_size(machine, *size)
|
||||
@@ -110,15 +136,23 @@ def main(argv: list[str]) -> int:
|
||||
# is caught even if it races the initial sync.
|
||||
signal.signal(signal.SIGWINCH, sync)
|
||||
|
||||
proc = subprocess.Popen(inner)
|
||||
try:
|
||||
proc = subprocess.Popen(inner)
|
||||
except BaseException:
|
||||
_log("Popen failed:\n" + traceback.format_exc())
|
||||
raise
|
||||
_log(f"child pid={proc.pid}")
|
||||
sync() # push initial size — VM PTY starts at 0 0.
|
||||
while True:
|
||||
try:
|
||||
return proc.wait()
|
||||
rc = proc.wait()
|
||||
_log(f"child exit rc={rc}")
|
||||
return rc
|
||||
except KeyboardInterrupt:
|
||||
# Ctrl-C in the operator's terminal → forward to the
|
||||
# child once, then keep waiting. claude handles its
|
||||
# own interrupt cleanup.
|
||||
_log("KeyboardInterrupt → forward SIGINT to child")
|
||||
proc.send_signal(signal.SIGINT)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user