diff --git a/guarddog_nexus/harvester.py b/guarddog_nexus/harvester.py index 8fbba9d..4cb6709 100644 --- a/guarddog_nexus/harvester.py +++ b/guarddog_nexus/harvester.py @@ -66,7 +66,7 @@ async def harvest( scan.status = ScanStatus.SCANNING.value await session.commit() - downloaded = download_asset(download_url, tmpdir) + downloaded = await download_asset(download_url, tmpdir) if not downloaded: scan.status = ScanStatus.FAILED.value scan.error_message = "Download failed" @@ -78,7 +78,7 @@ async def harvest( await session.commit() log.info("Scanning %s==%s", package_name, package_version) - result = scan_package(downloaded, ecosystem) + result = await scan_package(downloaded, ecosystem) findings_list = result.get("findings", []) diff --git a/guarddog_nexus/nexus_client.py b/guarddog_nexus/nexus_client.py index 8c81582..8d1e0a3 100644 --- a/guarddog_nexus/nexus_client.py +++ b/guarddog_nexus/nexus_client.py @@ -1,25 +1,14 @@ -"""Sonatype Nexus REST API client.""" +"""Sonatype Nexus REST API client using httpx async.""" import hashlib import os -import subprocess + +import httpx from guarddog_nexus.config import config from guarddog_nexus.logging_setup import log SUPPORTED_EXTENSIONS = (".tar.gz", ".tgz", ".whl", ".zip") -PACKAGE_FILE_PATTERNS = ("packages/",) - - -def get_ecosystem_from_format(fmt: str) -> str | None: - mapping = { - "pypi": "pypi", - "npm": "npm", - "rubygems": "rubygems", - "go": "go", - "raw": None, - } - return mapping.get(fmt.lower() if fmt else "") def extract_pypi_info(asset_path: str) -> tuple[str, str] | None: @@ -33,31 +22,28 @@ def extract_pypi_info(asset_path: str) -> tuple[str, str] | None: return None -def download_asset(download_url: str, dest_dir: str) -> str | None: - """Download an asset from Nexus using curl (available in Docker).""" +async def download_asset(download_url: str, dest_dir: str) -> str | None: + """Download an asset from Nexus using async httpx.""" dest_path = os.path.join(dest_dir, os.path.basename(download_url.split("?")[0])) - try: - result = subprocess.run( - [ - "curl", - "-sfSL", - "-u", - f"{config.nexus_username}:{config.nexus_password}", - "-o", - dest_path, - download_url, - ], - capture_output=True, - text=True, - timeout=120, - ) - if result.returncode != 0: - log.warning("Failed to download %s: %s", download_url, result.stderr) + + auth = httpx.BasicAuth(config.nexus_username, config.nexus_password) + async with httpx.AsyncClient(auth=auth, timeout=120, follow_redirects=True) as client: + try: + response = await client.get(download_url) + response.raise_for_status() + with open(dest_path, "wb") as f: + f.write(response.content) + return dest_path + except Exception as e: + log.warning("Failed to download %s: %s", download_url, e) return None - return dest_path - except Exception as e: - log.error("Download error for %s: %s", download_url, e) - return None + + +async def nexus_get(path: str) -> httpx.Response: + """Make an authenticated GET request to Nexus REST API.""" + auth = httpx.BasicAuth(config.nexus_username, config.nexus_password) + async with httpx.AsyncClient(auth=auth, timeout=30) as client: + return await client.get(f"{config.nexus_url.rstrip('/')}{path}") def compute_sha256(filepath: str) -> str: diff --git a/guarddog_nexus/scanner.py b/guarddog_nexus/scanner.py index 0389041..360269d 100644 --- a/guarddog_nexus/scanner.py +++ b/guarddog_nexus/scanner.py @@ -1,8 +1,8 @@ -"""GuardDog CLI integration via subprocess.""" +"""GuardDog CLI integration via asyncio subprocess.""" +import asyncio import json import shutil -import subprocess from guarddog_nexus.config import config from guarddog_nexus.logging_setup import log @@ -10,39 +10,33 @@ from guarddog_nexus.logging_setup import log GUARDDOG_BIN = shutil.which("guarddog") or "guarddog" -def scan_package(filepath: str, ecosystem: str = "pypi") -> dict: +async def scan_package(filepath: str, ecosystem: str = "pypi") -> dict: """Run guarddog scan on a downloaded package file. Returns normalized dict.""" - cmd = [ - GUARDDOG_BIN, - ecosystem, - "scan", - filepath, - "--output-format", - "json", - ] - + cmd = [GUARDDOG_BIN, ecosystem, "scan", filepath, "--output-format", "json"] log.info("Running: %s", " ".join(cmd)) try: - result = subprocess.run( - cmd, - capture_output=True, - text=True, - timeout=config.scan_timeout_seconds, + proc = await asyncio.create_subprocess_exec( + *cmd, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, ) - except subprocess.TimeoutExpired: + stdout, stderr = await asyncio.wait_for( + proc.communicate(), timeout=config.scan_timeout_seconds + ) + except asyncio.TimeoutError: log.error("GuardDog scan timed out for %s", filepath) return {"findings": [], "errors": ["timeout"]} except FileNotFoundError: log.error("GuardDog binary not found at %s", GUARDDOG_BIN) return {"findings": [], "errors": ["guarddog_not_found"]} - if result.returncode not in (0, 1): - log.error("GuardDog exited %d: %s", result.returncode, result.stderr) - return {"findings": [], "errors": [result.stderr.strip()]} + if proc.returncode not in (0, 1): + log.error("GuardDog exited %d: %s", proc.returncode, stderr.decode()) + return {"findings": [], "errors": [stderr.decode().strip()]} try: - data = json.loads(result.stdout) + data = json.loads(stdout.decode()) except json.JSONDecodeError: log.error("GuardDog returned invalid JSON for %s", filepath) return {"findings": [], "errors": ["json_parse_error"]} diff --git a/guarddog_nexus/webhooks.py b/guarddog_nexus/webhooks.py index e383a80..d0a9c1a 100644 --- a/guarddog_nexus/webhooks.py +++ b/guarddog_nexus/webhooks.py @@ -115,23 +115,16 @@ async def nexus_webhook( async def _scan_component(repository: str, name: str, version: str): """Look up component assets via Nexus API, then scan each package file.""" - import subprocess - api_url = ( - f"{config.nexus_url.rstrip('/')}/service/rest/v1/search" - f"?repository={repository}&name={name}&version={version}&format=pypi" + from guarddog_nexus.nexus_client import nexus_get + + api_path = ( + f"/service/rest/v1/search?repository={repository}&name={name}&version={version}&format=pypi" ) try: - result = subprocess.run( - ["curl", "-sf", "-u", f"{config.nexus_username}:{config.nexus_password}", api_url], - capture_output=True, - text=True, - timeout=30, - ) - if result.returncode != 0: - log.warning("Component lookup failed for %s==%s: %s", name, version, result.stderr) - return - data = json.loads(result.stdout) + resp = await nexus_get(api_path) + resp.raise_for_status() + data = resp.json() except Exception as e: log.warning("Component lookup error for %s==%s: %s", name, version, e) return