test(git-gate): shell-escaping regression tests (issue #159) #167

Closed
didericis-claude wants to merge 2 commits from regression/issue-159-shell-escaping into main
4 changed files with 208 additions and 11 deletions
+2 -4
View File
@@ -29,6 +29,7 @@ backend-specific and lives on concrete subclasses (see
from __future__ import annotations from __future__ import annotations
import shlex
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from dataclasses import dataclass from dataclasses import dataclass
from pathlib import Path from pathlib import Path
@@ -207,10 +208,7 @@ def git_gate_render_entrypoint(upstreams: tuple[GitGateUpstream, ...]) -> str:
"mkdir -p /git", "mkdir -p /git",
] ]
for u in upstreams: for u in upstreams:
# Single-quote args so URL/path content (containing : and /) lines.append(f"init_repo {shlex.quote(u.name)} {shlex.quote(u.upstream_url)}")
# passes through ash unmangled. Names came through the manifest
# validator so they don't contain a single quote.
lines.append(f"init_repo '{u.name}' '{u.upstream_url}'")
lines.extend([ lines.extend([
"", "",
"exec git daemon \\", "exec git daemon \\",
+12
View File
@@ -47,6 +47,7 @@ from __future__ import annotations
import ipaddress import ipaddress
import os import os
import re
from dataclasses import dataclass, field, replace from dataclasses import dataclass, field, replace
from pathlib import Path from pathlib import Path
from typing import Mapping, cast from typing import Mapping, cast
@@ -60,6 +61,12 @@ class ManifestError(Exception):
"""A manifest file (or the manifest tree) is invalid.""" """A manifest file (or the manifest tree) is invalid."""
# Shell-safe characters for git-gate repo names. Names are embedded in
# the generated entrypoint shell script (shlex.quote is the primary
# defence; this regex is belt-and-suspenders and documents intent).
_GIT_NAME_RE = re.compile(r"^[A-Za-z0-9._-]+$")
def _empty_str_dict() -> dict[str, str]: def _empty_str_dict() -> dict[str, str]:
return {} return {}
@@ -103,6 +110,11 @@ class GitEntry:
raise ManifestError( raise ManifestError(
f"bottle '{bottle_name}' git-gate.repos has an empty key" f"bottle '{bottle_name}' git-gate.repos has an empty key"
) )
if not _GIT_NAME_RE.match(repo_name):
raise ManifestError(
f"bottle '{bottle_name}' git-gate.repos name {repo_name!r} is invalid; "
f"allowed characters: A-Z a-z 0-9 . _ -"
)
label = f"git-gate.repos[{repo_name!r}]" label = f"git-gate.repos[{repo_name!r}]"
d = _as_json_object(raw, f"bottle '{bottle_name}' {label}") d = _as_json_object(raw, f"bottle '{bottle_name}' {label}")
for k in d: for k in d:
+149 -7
View File
@@ -76,14 +76,18 @@ class TestEntrypointRender(unittest.TestCase):
) )
script = git_gate_render_entrypoint(ups) script = git_gate_render_entrypoint(ups)
self.assertIn("#!/bin/sh", script) self.assertIn("#!/bin/sh", script)
self.assertIn( # shlex.quote leaves safe strings unquoted; verify the call tokens.
"init_repo 'bot-bottle' " import shlex as _shlex
"'ssh://git@gitea.dideric.is:30009/didericis/bot-bottle.git'", lines_with_init = [l for l in script.splitlines() if l.startswith("init_repo ")]
script, self.assertEqual(2, len(lines_with_init))
self.assertEqual(
["init_repo", "bot-bottle",
"ssh://git@gitea.dideric.is:30009/didericis/bot-bottle.git"],
_shlex.split(lines_with_init[0]),
) )
self.assertIn( self.assertEqual(
"init_repo 'foo' 'ssh://git@github.com/didericis/foo.git'", ["init_repo", "foo", "ssh://git@github.com/didericis/foo.git"],
script, _shlex.split(lines_with_init[1]),
) )
# Daemon line is what keeps PID 1 alive. # Daemon line is what keeps PID 1 alive.
self.assertIn("exec git daemon", script) self.assertIn("exec git daemon", script)
@@ -108,6 +112,46 @@ class TestEntrypointRender(unittest.TestCase):
self.assertNotIn("init_repo '", script) self.assertNotIn("init_repo '", script)
self.assertIn("exec git daemon", script) self.assertIn("exec git daemon", script)
def test_single_quote_in_upstream_url_is_escaped(self):
ups = (GitGateUpstream(
name="myrepo",
upstream_url="ssh://git@host/path'with'quotes.git",
upstream_host="host",
upstream_port="22",
identity_file="/key",
known_host_key="",
),)
script = git_gate_render_entrypoint(ups)
# The raw single-quoted form would break the shell script;
# shlex.quote must produce something safe.
self.assertNotIn(
"init_repo 'myrepo' 'ssh://git@host/path'with'quotes.git'",
script,
)
self.assertIn("init_repo", script)
self.assertIn("path", script)
def test_space_and_semicolon_in_upstream_url_are_escaped(self):
import shlex as _shlex
raw_url = "ssh://git@host/path with spaces;evil.git"
ups = (GitGateUpstream(
name="myrepo",
upstream_url=raw_url,
upstream_host="host",
upstream_port="22",
identity_file="/key",
known_host_key="",
),)
script = git_gate_render_entrypoint(ups)
# Skip the function-definition line "init_repo() {"; match the call.
line = next(l for l in script.splitlines() if l.startswith("init_repo "))
# Re-parsing via shlex must recover exactly the original URL with no
# shell injection — three tokens total.
tokens = _shlex.split(line)
self.assertEqual(3, len(tokens))
self.assertEqual("myrepo", tokens[1])
self.assertEqual(raw_url, tokens[2])
class TestHookRender(unittest.TestCase): class TestHookRender(unittest.TestCase):
def test_pre_receive_hook_has_two_phases(self): def test_pre_receive_hook_has_two_phases(self):
@@ -243,5 +287,103 @@ class TestPrepare(unittest.TestCase):
self.assertIn("exec git daemon", content) self.assertIn("exec git daemon", content)
class TestShellEscaping(unittest.TestCase):
"""Regression tests: all three render functions must produce syntactically
valid sh code even when names and upstream URLs contain shell-special
characters. Tests construct GitGateUpstream directly — bypassing manifest
name validation — so the rendering layer is exercised in isolation."""
_MALICIOUS_URL_CASES = [
("single_quote", "ssh://git@host/path'with'quotes.git"),
("double_quote", 'ssh://git@host/path"with"quotes.git'),
("space", "ssh://git@host/path with spaces.git"),
("semicolon", "ssh://git@host/path;evil.git"),
("newline", "ssh://git@host/path\nwith\nnewlines.git"),
("backtick", "ssh://git@host/path`whoami`.git"),
]
_MALICIOUS_NAME_CASES = [
("single_quote", "repo'name"),
("double_quote", 'repo"name'),
("space", "repo name"),
("semicolon", "repo;name"),
("newline", "repo\nname"),
("backtick", "repo`name"),
]
def _make_upstream(self, url: str, name: str = "myrepo") -> GitGateUpstream:
return GitGateUpstream(
name=name,
upstream_url=url,
upstream_host="host",
upstream_port="22",
identity_file="/key",
known_host_key="",
)
def _assert_valid_sh(self, script: str, label: str = "") -> None:
import subprocess
fd, path = tempfile.mkstemp(suffix=".sh")
try:
with os.fdopen(fd, "w") as f:
f.write(script)
result = subprocess.run(
["sh", "-n", path], capture_output=True, text=True,
)
self.assertEqual(
0, result.returncode,
f"sh -n failed{(' for ' + label) if label else ''}: {result.stderr}",
)
finally:
os.unlink(path)
def test_hook_renders_valid_sh(self):
self._assert_valid_sh(git_gate_render_hook(), "pre-receive hook")
def test_access_hook_renders_valid_sh(self):
self._assert_valid_sh(git_gate_render_access_hook(), "access hook")
def test_entrypoint_with_pathological_upstream_url_renders_valid_sh(self):
for label, url in self._MALICIOUS_URL_CASES:
with self.subTest(char=label):
script = git_gate_render_entrypoint((self._make_upstream(url),))
self._assert_valid_sh(script, label)
def test_entrypoint_upstream_url_value_preserved_after_quoting(self):
import shlex as _shlex
for label, url in self._MALICIOUS_URL_CASES:
with self.subTest(char=label):
script = git_gate_render_entrypoint((self._make_upstream(url),))
# The quoted form of the URL must appear verbatim in the script so
# the shell reconstructs exactly the original value at runtime.
expected = f"init_repo {_shlex.quote('myrepo')} {_shlex.quote(url)}"
self.assertIn(
expected, script,
f"{label}: expected quoted form not found in script",
)
def test_entrypoint_with_pathological_name_renders_valid_sh(self):
for label, name in self._MALICIOUS_NAME_CASES:
with self.subTest(char=label):
script = git_gate_render_entrypoint((
self._make_upstream("ssh://git@github.com/foo/bar.git", name=name),
))
self._assert_valid_sh(script, label)
def test_entrypoint_name_value_preserved_after_quoting(self):
import shlex as _shlex
url = "ssh://git@github.com/foo/bar.git"
for label, name in self._MALICIOUS_NAME_CASES:
with self.subTest(char=label):
script = git_gate_render_entrypoint((
self._make_upstream(url, name=name),
))
expected = f"init_repo {_shlex.quote(name)} {_shlex.quote(url)}"
self.assertIn(
expected, script,
f"{label}: expected quoted form not found in script",
)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
+45
View File
@@ -185,6 +185,51 @@ class TestGitEntryCrossValidation(unittest.TestCase):
"agents": {"demo": {"skills": [], "prompt": "", "bottle": "dev"}}, "agents": {"demo": {"skills": [], "prompt": "", "bottle": "dev"}},
}) })
def test_name_with_single_quote_dies(self):
with self.assertRaises(ManifestError):
Manifest.from_json_obj(_manifest({
"o'reilly": {
"url": "ssh://git@github.com/foo.git",
"identity": "/dev/null",
},
}))
def test_name_with_space_dies(self):
with self.assertRaises(ManifestError):
Manifest.from_json_obj(_manifest({
"my repo": {
"url": "ssh://git@github.com/foo.git",
"identity": "/dev/null",
},
}))
def test_name_with_semicolon_dies(self):
with self.assertRaises(ManifestError):
Manifest.from_json_obj(_manifest({
"foo;bar": {
"url": "ssh://git@github.com/foo.git",
"identity": "/dev/null",
},
}))
def test_name_with_dollar_dies(self):
with self.assertRaises(ManifestError):
Manifest.from_json_obj(_manifest({
"foo$bar": {
"url": "ssh://git@github.com/foo.git",
"identity": "/dev/null",
},
}))
def test_valid_name_with_dots_and_hyphens_accepted(self):
m = Manifest.from_json_obj(_manifest({
"my.repo-name_1": {
"url": "ssh://git@github.com/foo.git",
"identity": "/dev/null",
},
}))
self.assertEqual("my.repo-name_1", m.bottles["dev"].git[0].Name)
def test_legacy_git_key_dies_with_hint(self): def test_legacy_git_key_dies_with_hint(self):
msg = "" msg = ""
try: try: