test(git-gate): shell-escaping regression tests (issue #159) #167

Closed
didericis-claude wants to merge 2 commits from regression/issue-159-shell-escaping into main
4 changed files with 208 additions and 11 deletions
+2 -4
View File
@@ -29,6 +29,7 @@ backend-specific and lives on concrete subclasses (see
from __future__ import annotations
import shlex
from abc import ABC, abstractmethod
from dataclasses import dataclass
from pathlib import Path
@@ -207,10 +208,7 @@ def git_gate_render_entrypoint(upstreams: tuple[GitGateUpstream, ...]) -> str:
"mkdir -p /git",
]
for u in upstreams:
# Single-quote args so URL/path content (containing : and /)
# passes through ash unmangled. Names came through the manifest
# validator so they don't contain a single quote.
lines.append(f"init_repo '{u.name}' '{u.upstream_url}'")
lines.append(f"init_repo {shlex.quote(u.name)} {shlex.quote(u.upstream_url)}")
lines.extend([
"",
"exec git daemon \\",
+12
View File
@@ -47,6 +47,7 @@ from __future__ import annotations
import ipaddress
import os
import re
from dataclasses import dataclass, field, replace
from pathlib import Path
from typing import Mapping, cast
@@ -60,6 +61,12 @@ class ManifestError(Exception):
"""A manifest file (or the manifest tree) is invalid."""
# Shell-safe characters for git-gate repo names. Names are embedded in
# the generated entrypoint shell script (shlex.quote is the primary
# defence; this regex is belt-and-suspenders and documents intent).
_GIT_NAME_RE = re.compile(r"^[A-Za-z0-9._-]+$")
def _empty_str_dict() -> dict[str, str]:
return {}
@@ -103,6 +110,11 @@ class GitEntry:
raise ManifestError(
f"bottle '{bottle_name}' git-gate.repos has an empty key"
)
if not _GIT_NAME_RE.match(repo_name):
raise ManifestError(
f"bottle '{bottle_name}' git-gate.repos name {repo_name!r} is invalid; "
f"allowed characters: A-Z a-z 0-9 . _ -"
)
label = f"git-gate.repos[{repo_name!r}]"
d = _as_json_object(raw, f"bottle '{bottle_name}' {label}")
for k in d:
+149 -7
View File
@@ -76,14 +76,18 @@ class TestEntrypointRender(unittest.TestCase):
)
script = git_gate_render_entrypoint(ups)
self.assertIn("#!/bin/sh", script)
self.assertIn(
"init_repo 'bot-bottle' "
"'ssh://git@gitea.dideric.is:30009/didericis/bot-bottle.git'",
script,
# shlex.quote leaves safe strings unquoted; verify the call tokens.
import shlex as _shlex
lines_with_init = [l for l in script.splitlines() if l.startswith("init_repo ")]
self.assertEqual(2, len(lines_with_init))
self.assertEqual(
["init_repo", "bot-bottle",
"ssh://git@gitea.dideric.is:30009/didericis/bot-bottle.git"],
_shlex.split(lines_with_init[0]),
)
self.assertIn(
"init_repo 'foo' 'ssh://git@github.com/didericis/foo.git'",
script,
self.assertEqual(
["init_repo", "foo", "ssh://git@github.com/didericis/foo.git"],
_shlex.split(lines_with_init[1]),
)
# Daemon line is what keeps PID 1 alive.
self.assertIn("exec git daemon", script)
@@ -108,6 +112,46 @@ class TestEntrypointRender(unittest.TestCase):
self.assertNotIn("init_repo '", script)
self.assertIn("exec git daemon", script)
def test_single_quote_in_upstream_url_is_escaped(self):
ups = (GitGateUpstream(
name="myrepo",
upstream_url="ssh://git@host/path'with'quotes.git",
upstream_host="host",
upstream_port="22",
identity_file="/key",
known_host_key="",
),)
script = git_gate_render_entrypoint(ups)
# The raw single-quoted form would break the shell script;
# shlex.quote must produce something safe.
self.assertNotIn(
"init_repo 'myrepo' 'ssh://git@host/path'with'quotes.git'",
script,
)
self.assertIn("init_repo", script)
self.assertIn("path", script)
def test_space_and_semicolon_in_upstream_url_are_escaped(self):
import shlex as _shlex
raw_url = "ssh://git@host/path with spaces;evil.git"
ups = (GitGateUpstream(
name="myrepo",
upstream_url=raw_url,
upstream_host="host",
upstream_port="22",
identity_file="/key",
known_host_key="",
),)
script = git_gate_render_entrypoint(ups)
# Skip the function-definition line "init_repo() {"; match the call.
line = next(l for l in script.splitlines() if l.startswith("init_repo "))
# Re-parsing via shlex must recover exactly the original URL with no
# shell injection — three tokens total.
tokens = _shlex.split(line)
self.assertEqual(3, len(tokens))
self.assertEqual("myrepo", tokens[1])
self.assertEqual(raw_url, tokens[2])
class TestHookRender(unittest.TestCase):
def test_pre_receive_hook_has_two_phases(self):
@@ -243,5 +287,103 @@ class TestPrepare(unittest.TestCase):
self.assertIn("exec git daemon", content)
class TestShellEscaping(unittest.TestCase):
"""Regression tests: all three render functions must produce syntactically
valid sh code even when names and upstream URLs contain shell-special
characters. Tests construct GitGateUpstream directly — bypassing manifest
name validation — so the rendering layer is exercised in isolation."""
_MALICIOUS_URL_CASES = [
("single_quote", "ssh://git@host/path'with'quotes.git"),
("double_quote", 'ssh://git@host/path"with"quotes.git'),
("space", "ssh://git@host/path with spaces.git"),
("semicolon", "ssh://git@host/path;evil.git"),
("newline", "ssh://git@host/path\nwith\nnewlines.git"),
("backtick", "ssh://git@host/path`whoami`.git"),
]
_MALICIOUS_NAME_CASES = [
("single_quote", "repo'name"),
("double_quote", 'repo"name'),
("space", "repo name"),
("semicolon", "repo;name"),
("newline", "repo\nname"),
("backtick", "repo`name"),
]
def _make_upstream(self, url: str, name: str = "myrepo") -> GitGateUpstream:
return GitGateUpstream(
name=name,
upstream_url=url,
upstream_host="host",
upstream_port="22",
identity_file="/key",
known_host_key="",
)
def _assert_valid_sh(self, script: str, label: str = "") -> None:
import subprocess
fd, path = tempfile.mkstemp(suffix=".sh")
try:
with os.fdopen(fd, "w") as f:
f.write(script)
result = subprocess.run(
["sh", "-n", path], capture_output=True, text=True,
)
self.assertEqual(
0, result.returncode,
f"sh -n failed{(' for ' + label) if label else ''}: {result.stderr}",
)
finally:
os.unlink(path)
def test_hook_renders_valid_sh(self):
self._assert_valid_sh(git_gate_render_hook(), "pre-receive hook")
def test_access_hook_renders_valid_sh(self):
self._assert_valid_sh(git_gate_render_access_hook(), "access hook")
def test_entrypoint_with_pathological_upstream_url_renders_valid_sh(self):
for label, url in self._MALICIOUS_URL_CASES:
with self.subTest(char=label):
script = git_gate_render_entrypoint((self._make_upstream(url),))
self._assert_valid_sh(script, label)
def test_entrypoint_upstream_url_value_preserved_after_quoting(self):
import shlex as _shlex
for label, url in self._MALICIOUS_URL_CASES:
with self.subTest(char=label):
script = git_gate_render_entrypoint((self._make_upstream(url),))
# The quoted form of the URL must appear verbatim in the script so
# the shell reconstructs exactly the original value at runtime.
expected = f"init_repo {_shlex.quote('myrepo')} {_shlex.quote(url)}"
self.assertIn(
expected, script,
f"{label}: expected quoted form not found in script",
)
def test_entrypoint_with_pathological_name_renders_valid_sh(self):
for label, name in self._MALICIOUS_NAME_CASES:
with self.subTest(char=label):
script = git_gate_render_entrypoint((
self._make_upstream("ssh://git@github.com/foo/bar.git", name=name),
))
self._assert_valid_sh(script, label)
def test_entrypoint_name_value_preserved_after_quoting(self):
import shlex as _shlex
url = "ssh://git@github.com/foo/bar.git"
for label, name in self._MALICIOUS_NAME_CASES:
with self.subTest(char=label):
script = git_gate_render_entrypoint((
self._make_upstream(url, name=name),
))
expected = f"init_repo {_shlex.quote(name)} {_shlex.quote(url)}"
self.assertIn(
expected, script,
f"{label}: expected quoted form not found in script",
)
if __name__ == "__main__":
unittest.main()
+45
View File
@@ -185,6 +185,51 @@ class TestGitEntryCrossValidation(unittest.TestCase):
"agents": {"demo": {"skills": [], "prompt": "", "bottle": "dev"}},
})
def test_name_with_single_quote_dies(self):
with self.assertRaises(ManifestError):
Manifest.from_json_obj(_manifest({
"o'reilly": {
"url": "ssh://git@github.com/foo.git",
"identity": "/dev/null",
},
}))
def test_name_with_space_dies(self):
with self.assertRaises(ManifestError):
Manifest.from_json_obj(_manifest({
"my repo": {
"url": "ssh://git@github.com/foo.git",
"identity": "/dev/null",
},
}))
def test_name_with_semicolon_dies(self):
with self.assertRaises(ManifestError):
Manifest.from_json_obj(_manifest({
"foo;bar": {
"url": "ssh://git@github.com/foo.git",
"identity": "/dev/null",
},
}))
def test_name_with_dollar_dies(self):
with self.assertRaises(ManifestError):
Manifest.from_json_obj(_manifest({
"foo$bar": {
"url": "ssh://git@github.com/foo.git",
"identity": "/dev/null",
},
}))
def test_valid_name_with_dots_and_hyphens_accepted(self):
m = Manifest.from_json_obj(_manifest({
"my.repo-name_1": {
"url": "ssh://git@github.com/foo.git",
"identity": "/dev/null",
},
}))
self.assertEqual("my.repo-name_1", m.bottles["dev"].git[0].Name)
def test_legacy_git_key_dies_with_hint(self):
msg = ""
try: