diff --git a/bot_bottle/git_gate.py b/bot_bottle/git_gate.py index 59b29d1..274d121 100644 --- a/bot_bottle/git_gate.py +++ b/bot_bottle/git_gate.py @@ -29,6 +29,7 @@ backend-specific and lives on concrete subclasses (see from __future__ import annotations +import shlex from abc import ABC, abstractmethod from dataclasses import dataclass from pathlib import Path @@ -207,10 +208,7 @@ def git_gate_render_entrypoint(upstreams: tuple[GitGateUpstream, ...]) -> str: "mkdir -p /git", ] for u in upstreams: - # Single-quote args so URL/path content (containing : and /) - # passes through ash unmangled. Names came through the manifest - # validator so they don't contain a single quote. - lines.append(f"init_repo '{u.name}' '{u.upstream_url}'") + lines.append(f"init_repo {shlex.quote(u.name)} {shlex.quote(u.upstream_url)}") lines.extend([ "", "exec git daemon \\", diff --git a/bot_bottle/manifest.py b/bot_bottle/manifest.py index 89a3a87..343f979 100644 --- a/bot_bottle/manifest.py +++ b/bot_bottle/manifest.py @@ -47,6 +47,7 @@ from __future__ import annotations import ipaddress import os +import re from dataclasses import dataclass, field, replace from pathlib import Path from typing import Mapping, cast @@ -60,6 +61,12 @@ class ManifestError(Exception): """A manifest file (or the manifest tree) is invalid.""" +# Shell-safe characters for git-gate repo names. Names are embedded in +# the generated entrypoint shell script (shlex.quote is the primary +# defence; this regex is belt-and-suspenders and documents intent). +_GIT_NAME_RE = re.compile(r"^[A-Za-z0-9._-]+$") + + def _empty_str_dict() -> dict[str, str]: return {} @@ -103,6 +110,11 @@ class GitEntry: raise ManifestError( f"bottle '{bottle_name}' git-gate.repos has an empty key" ) + if not _GIT_NAME_RE.match(repo_name): + raise ManifestError( + f"bottle '{bottle_name}' git-gate.repos name {repo_name!r} is invalid; " + f"allowed characters: A-Z a-z 0-9 . _ -" + ) label = f"git-gate.repos[{repo_name!r}]" d = _as_json_object(raw, f"bottle '{bottle_name}' {label}") for k in d: diff --git a/tests/unit/test_git_gate.py b/tests/unit/test_git_gate.py index c93462c..6884c78 100644 --- a/tests/unit/test_git_gate.py +++ b/tests/unit/test_git_gate.py @@ -76,14 +76,18 @@ class TestEntrypointRender(unittest.TestCase): ) script = git_gate_render_entrypoint(ups) self.assertIn("#!/bin/sh", script) - self.assertIn( - "init_repo 'bot-bottle' " - "'ssh://git@gitea.dideric.is:30009/didericis/bot-bottle.git'", - script, + # shlex.quote leaves safe strings unquoted; verify the call tokens. + import shlex as _shlex + lines_with_init = [l for l in script.splitlines() if l.startswith("init_repo ")] + self.assertEqual(2, len(lines_with_init)) + self.assertEqual( + ["init_repo", "bot-bottle", + "ssh://git@gitea.dideric.is:30009/didericis/bot-bottle.git"], + _shlex.split(lines_with_init[0]), ) - self.assertIn( - "init_repo 'foo' 'ssh://git@github.com/didericis/foo.git'", - script, + self.assertEqual( + ["init_repo", "foo", "ssh://git@github.com/didericis/foo.git"], + _shlex.split(lines_with_init[1]), ) # Daemon line is what keeps PID 1 alive. self.assertIn("exec git daemon", script) @@ -108,6 +112,46 @@ class TestEntrypointRender(unittest.TestCase): self.assertNotIn("init_repo '", script) self.assertIn("exec git daemon", script) + def test_single_quote_in_upstream_url_is_escaped(self): + ups = (GitGateUpstream( + name="myrepo", + upstream_url="ssh://git@host/path'with'quotes.git", + upstream_host="host", + upstream_port="22", + identity_file="/key", + known_host_key="", + ),) + script = git_gate_render_entrypoint(ups) + # The raw single-quoted form would break the shell script; + # shlex.quote must produce something safe. + self.assertNotIn( + "init_repo 'myrepo' 'ssh://git@host/path'with'quotes.git'", + script, + ) + self.assertIn("init_repo", script) + self.assertIn("path", script) + + def test_space_and_semicolon_in_upstream_url_are_escaped(self): + import shlex as _shlex + raw_url = "ssh://git@host/path with spaces;evil.git" + ups = (GitGateUpstream( + name="myrepo", + upstream_url=raw_url, + upstream_host="host", + upstream_port="22", + identity_file="/key", + known_host_key="", + ),) + script = git_gate_render_entrypoint(ups) + # Skip the function-definition line "init_repo() {"; match the call. + line = next(l for l in script.splitlines() if l.startswith("init_repo ")) + # Re-parsing via shlex must recover exactly the original URL with no + # shell injection — three tokens total. + tokens = _shlex.split(line) + self.assertEqual(3, len(tokens)) + self.assertEqual("myrepo", tokens[1]) + self.assertEqual(raw_url, tokens[2]) + class TestHookRender(unittest.TestCase): def test_pre_receive_hook_has_two_phases(self): @@ -243,5 +287,103 @@ class TestPrepare(unittest.TestCase): self.assertIn("exec git daemon", content) +class TestShellEscaping(unittest.TestCase): + """Regression tests: all three render functions must produce syntactically + valid sh code even when names and upstream URLs contain shell-special + characters. Tests construct GitGateUpstream directly — bypassing manifest + name validation — so the rendering layer is exercised in isolation.""" + + _MALICIOUS_URL_CASES = [ + ("single_quote", "ssh://git@host/path'with'quotes.git"), + ("double_quote", 'ssh://git@host/path"with"quotes.git'), + ("space", "ssh://git@host/path with spaces.git"), + ("semicolon", "ssh://git@host/path;evil.git"), + ("newline", "ssh://git@host/path\nwith\nnewlines.git"), + ("backtick", "ssh://git@host/path`whoami`.git"), + ] + + _MALICIOUS_NAME_CASES = [ + ("single_quote", "repo'name"), + ("double_quote", 'repo"name'), + ("space", "repo name"), + ("semicolon", "repo;name"), + ("newline", "repo\nname"), + ("backtick", "repo`name"), + ] + + def _make_upstream(self, url: str, name: str = "myrepo") -> GitGateUpstream: + return GitGateUpstream( + name=name, + upstream_url=url, + upstream_host="host", + upstream_port="22", + identity_file="/key", + known_host_key="", + ) + + def _assert_valid_sh(self, script: str, label: str = "") -> None: + import subprocess + fd, path = tempfile.mkstemp(suffix=".sh") + try: + with os.fdopen(fd, "w") as f: + f.write(script) + result = subprocess.run( + ["sh", "-n", path], capture_output=True, text=True, + ) + self.assertEqual( + 0, result.returncode, + f"sh -n failed{(' for ' + label) if label else ''}: {result.stderr}", + ) + finally: + os.unlink(path) + + def test_hook_renders_valid_sh(self): + self._assert_valid_sh(git_gate_render_hook(), "pre-receive hook") + + def test_access_hook_renders_valid_sh(self): + self._assert_valid_sh(git_gate_render_access_hook(), "access hook") + + def test_entrypoint_with_pathological_upstream_url_renders_valid_sh(self): + for label, url in self._MALICIOUS_URL_CASES: + with self.subTest(char=label): + script = git_gate_render_entrypoint((self._make_upstream(url),)) + self._assert_valid_sh(script, label) + + def test_entrypoint_upstream_url_value_preserved_after_quoting(self): + import shlex as _shlex + for label, url in self._MALICIOUS_URL_CASES: + with self.subTest(char=label): + script = git_gate_render_entrypoint((self._make_upstream(url),)) + # The quoted form of the URL must appear verbatim in the script so + # the shell reconstructs exactly the original value at runtime. + expected = f"init_repo {_shlex.quote('myrepo')} {_shlex.quote(url)}" + self.assertIn( + expected, script, + f"{label}: expected quoted form not found in script", + ) + + def test_entrypoint_with_pathological_name_renders_valid_sh(self): + for label, name in self._MALICIOUS_NAME_CASES: + with self.subTest(char=label): + script = git_gate_render_entrypoint(( + self._make_upstream("ssh://git@github.com/foo/bar.git", name=name), + )) + self._assert_valid_sh(script, label) + + def test_entrypoint_name_value_preserved_after_quoting(self): + import shlex as _shlex + url = "ssh://git@github.com/foo/bar.git" + for label, name in self._MALICIOUS_NAME_CASES: + with self.subTest(char=label): + script = git_gate_render_entrypoint(( + self._make_upstream(url, name=name), + )) + expected = f"init_repo {_shlex.quote(name)} {_shlex.quote(url)}" + self.assertIn( + expected, script, + f"{label}: expected quoted form not found in script", + ) + + if __name__ == "__main__": unittest.main() diff --git a/tests/unit/test_manifest_git.py b/tests/unit/test_manifest_git.py index 5422497..8fcc22e 100644 --- a/tests/unit/test_manifest_git.py +++ b/tests/unit/test_manifest_git.py @@ -185,6 +185,51 @@ class TestGitEntryCrossValidation(unittest.TestCase): "agents": {"demo": {"skills": [], "prompt": "", "bottle": "dev"}}, }) + def test_name_with_single_quote_dies(self): + with self.assertRaises(ManifestError): + Manifest.from_json_obj(_manifest({ + "o'reilly": { + "url": "ssh://git@github.com/foo.git", + "identity": "/dev/null", + }, + })) + + def test_name_with_space_dies(self): + with self.assertRaises(ManifestError): + Manifest.from_json_obj(_manifest({ + "my repo": { + "url": "ssh://git@github.com/foo.git", + "identity": "/dev/null", + }, + })) + + def test_name_with_semicolon_dies(self): + with self.assertRaises(ManifestError): + Manifest.from_json_obj(_manifest({ + "foo;bar": { + "url": "ssh://git@github.com/foo.git", + "identity": "/dev/null", + }, + })) + + def test_name_with_dollar_dies(self): + with self.assertRaises(ManifestError): + Manifest.from_json_obj(_manifest({ + "foo$bar": { + "url": "ssh://git@github.com/foo.git", + "identity": "/dev/null", + }, + })) + + def test_valid_name_with_dots_and_hyphens_accepted(self): + m = Manifest.from_json_obj(_manifest({ + "my.repo-name_1": { + "url": "ssh://git@github.com/foo.git", + "identity": "/dev/null", + }, + })) + self.assertEqual("my.repo-name_1", m.bottles["dev"].git[0].Name) + def test_legacy_git_key_dies_with_hint(self): msg = "" try: