chore(smolmachines): instrument pty_resize wrapper for crash diagnosis
User reports launch crashing only inside tmux (works outside). The wrapper itself runs fine in standalone tmux repros, so the break is in some interaction we can't see — curses eats stderr, default tmux remain-on-exit is off, and the pane closes before the operator can read anything. Add an always-on per-pid log at ~/.claude-bottle/pty_resize.log: - start record: argv, cwd, PATH, TMUX status - sync record: window size observed - child pid + exit rc - any KeyboardInterrupt forwarding - Popen failure traceback if it dies Append-mode, small overhead, easy to grep + share. Removable (along with the wrapper itself) once smolvm forwards SIGWINCH natively. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -35,6 +35,7 @@ follow-up tracked separately)."""
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import datetime
|
||||||
import fcntl
|
import fcntl
|
||||||
import os
|
import os
|
||||||
import signal
|
import signal
|
||||||
@@ -42,6 +43,25 @@ import struct
|
|||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
import termios
|
import termios
|
||||||
|
import traceback
|
||||||
|
|
||||||
|
|
||||||
|
# Debug log so we can diagnose tmux-pane crashes that happen in
|
||||||
|
# pane respawn — the dashboard's curses surface eats stderr, and
|
||||||
|
# `tmux respawn-pane`'s default remain-on-exit is off. Always-on
|
||||||
|
# (small overhead) so a user reporting a crash can just share the
|
||||||
|
# file. Append-mode, per-pid line prefix.
|
||||||
|
_DEBUG_LOG_PATH = os.path.expanduser("~/.claude-bottle/pty_resize.log")
|
||||||
|
|
||||||
|
|
||||||
|
def _log(msg: str) -> None:
|
||||||
|
try:
|
||||||
|
os.makedirs(os.path.dirname(_DEBUG_LOG_PATH), exist_ok=True)
|
||||||
|
with open(_DEBUG_LOG_PATH, "a") as f:
|
||||||
|
ts = datetime.datetime.now().isoformat(timespec="milliseconds")
|
||||||
|
f.write(f"[{ts} pid={os.getpid()}] {msg}\n")
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
def _read_winsize() -> tuple[int, int] | None:
|
def _read_winsize() -> tuple[int, int] | None:
|
||||||
@@ -90,17 +110,23 @@ def main(argv: list[str]) -> int:
|
|||||||
We don't use argparse — the `--` separator is the contract and
|
We don't use argparse — the `--` separator is the contract and
|
||||||
everything past it is forwarded verbatim. Keeps the wrapper
|
everything past it is forwarded verbatim. Keeps the wrapper
|
||||||
transparent for callers building argv programmatically."""
|
transparent for callers building argv programmatically."""
|
||||||
|
_log(f"start argv={argv!r} cwd={os.getcwd()!r} "
|
||||||
|
f"PATH={os.environ.get('PATH','')!r} "
|
||||||
|
f"TMUX={os.environ.get('TMUX','<unset>')!r}")
|
||||||
|
|
||||||
if len(argv) < 3 or argv[1] != "--":
|
if len(argv) < 3 or argv[1] != "--":
|
||||||
sys.stderr.write(
|
sys.stderr.write(
|
||||||
"usage: python -m claude_bottle.backend.smolmachines.pty_resize "
|
"usage: python -m claude_bottle.backend.smolmachines.pty_resize "
|
||||||
"<machine> -- <smolvm-argv...>\n"
|
"<machine> -- <smolvm-argv...>\n"
|
||||||
)
|
)
|
||||||
|
_log("exit=2 (bad argv)")
|
||||||
return 2
|
return 2
|
||||||
machine = argv[0]
|
machine = argv[0]
|
||||||
inner = argv[2:]
|
inner = argv[2:]
|
||||||
|
|
||||||
def sync(*_args) -> None:
|
def sync(*_args) -> None:
|
||||||
size = _read_winsize()
|
size = _read_winsize()
|
||||||
|
_log(f"sync size={size!r}")
|
||||||
if size is None:
|
if size is None:
|
||||||
return
|
return
|
||||||
_push_size(machine, *size)
|
_push_size(machine, *size)
|
||||||
@@ -110,15 +136,23 @@ def main(argv: list[str]) -> int:
|
|||||||
# is caught even if it races the initial sync.
|
# is caught even if it races the initial sync.
|
||||||
signal.signal(signal.SIGWINCH, sync)
|
signal.signal(signal.SIGWINCH, sync)
|
||||||
|
|
||||||
proc = subprocess.Popen(inner)
|
try:
|
||||||
|
proc = subprocess.Popen(inner)
|
||||||
|
except BaseException:
|
||||||
|
_log("Popen failed:\n" + traceback.format_exc())
|
||||||
|
raise
|
||||||
|
_log(f"child pid={proc.pid}")
|
||||||
sync() # push initial size — VM PTY starts at 0 0.
|
sync() # push initial size — VM PTY starts at 0 0.
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
return proc.wait()
|
rc = proc.wait()
|
||||||
|
_log(f"child exit rc={rc}")
|
||||||
|
return rc
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
# Ctrl-C in the operator's terminal → forward to the
|
# Ctrl-C in the operator's terminal → forward to the
|
||||||
# child once, then keep waiting. claude handles its
|
# child once, then keep waiting. claude handles its
|
||||||
# own interrupt cleanup.
|
# own interrupt cleanup.
|
||||||
|
_log("KeyboardInterrupt → forward SIGINT to child")
|
||||||
proc.send_signal(signal.SIGINT)
|
proc.send_signal(signal.SIGINT)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user