From 67433214638a5744994a1aefdee9ca91ad67f80a Mon Sep 17 00:00:00 2001 From: Marker689 Date: Mon, 11 May 2026 19:38:15 +0300 Subject: [PATCH] feat: SSRF protection via NEXUS_ALLOWED_HOSTS, _env_int validation warnings --- .env.example | 1 + AGENTS.md | 1 + guarddog_nexus/config.py | 18 +++++++++++++++++- guarddog_nexus/core/nexus.py | 15 ++++++++++++++- 4 files changed, 33 insertions(+), 2 deletions(-) diff --git a/.env.example b/.env.example index f6cdc20..6f71dea 100644 --- a/.env.example +++ b/.env.example @@ -1,5 +1,6 @@ # Nexus connection NEXUS_URL=http://nexus:8081 +# NEXUS_ALLOWED_HOSTS=nexus,nexus.local # default: host from NEXUS_URL # Database DATABASE_PATH=/data/guarddog.db diff --git a/AGENTS.md b/AGENTS.md index 1867a73..54eeb7e 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -94,6 +94,7 @@ All via environment variables, defined in `config.py`. Key ones: | Variable | Default | Notes | |----------|---------|-------| | `NEXUS_URL` | `http://localhost:8081` | | +| `NEXUS_ALLOWED_HOSTS` | host from `NEXUS_URL` | comma-separated, SSRF protection | | `WEBHOOK_SECRET` | `""` | HMAC-SHA256 validation | | `MAX_CONCURRENT_SCANS` | `4` | asyncio.Semaphore for guarddog processes | | `LLM_ENABLED` | `0` | `1` to enable analysis | diff --git a/guarddog_nexus/config.py b/guarddog_nexus/config.py index 802c814..fc8643b 100644 --- a/guarddog_nexus/config.py +++ b/guarddog_nexus/config.py @@ -1,7 +1,8 @@ """Configuration via environment variables.""" import os -from dataclasses import dataclass +from dataclasses import dataclass, field +from urllib.parse import urlparse from guarddog_nexus.constants import ( DEFAULT_MAX_CONCURRENT_SCANS, @@ -21,13 +22,28 @@ def _env_int(name: str, default: int) -> int: try: return int(val) except ValueError: + import logging + + logging.getLogger("guarddog_nexus").warning( + "Invalid value for %s=%r, using default %d", name, val, default + ) return default +def _resolve_allowed_hosts() -> list[str]: + raw = os.getenv("NEXUS_ALLOWED_HOSTS") + if raw: + return [h.strip() for h in raw.split(",") if h.strip()] + parsed = urlparse(os.getenv("NEXUS_URL", "http://localhost:8081")) + host = parsed.hostname or "localhost" + return [host] + + @dataclass class Config: # Nexus connection nexus_url: str = os.getenv("NEXUS_URL", "http://localhost:8081") + nexus_allowed_hosts: list[str] = field(default_factory=lambda: _resolve_allowed_hosts()) nexus_download_timeout: int = _env_int("NEXUS_DOWNLOAD_TIMEOUT_SECONDS", HTTP_TIMEOUT_DOWNLOAD) nexus_api_timeout: int = _env_int("NEXUS_API_TIMEOUT_SECONDS", HTTP_TIMEOUT_API) diff --git a/guarddog_nexus/core/nexus.py b/guarddog_nexus/core/nexus.py index c79fe8d..0934ebd 100644 --- a/guarddog_nexus/core/nexus.py +++ b/guarddog_nexus/core/nexus.py @@ -3,7 +3,7 @@ import asyncio import hashlib import os -from urllib.parse import unquote +from urllib.parse import unquote, urlparse import httpx @@ -15,6 +15,15 @@ from ..constants import ( from ..logging_setup import log +def _validate_download_url(url: str) -> bool: + parsed = urlparse(url) + if parsed.scheme not in ("http", "https"): + return False + if parsed.hostname not in config.nexus_allowed_hosts: + return False + return True + + def extract_pypi_info(asset_path: str) -> tuple[str, str] | None: """Extract package name and version from a PyPI asset path. @@ -101,6 +110,10 @@ def parse_package_path(path: str) -> tuple[str, str]: async def download_asset(download_url: str, dest_dir: str) -> str | None: """Download an asset from Nexus using async httpx.""" + if not _validate_download_url(download_url): + log.warning("SSRF prevention: blocked download from %s", download_url) + return None + dest_path = os.path.join(dest_dir, os.path.basename(download_url.split("?")[0])) async with httpx.AsyncClient(