fix: async subprocess + httpx — no more event loop blocking during scans

This commit is contained in:
Marker689
2026-05-09 05:58:55 +03:00
parent e577f1944c
commit 41a8745198
4 changed files with 48 additions and 75 deletions

View File

@@ -66,7 +66,7 @@ async def harvest(
scan.status = ScanStatus.SCANNING.value scan.status = ScanStatus.SCANNING.value
await session.commit() await session.commit()
downloaded = download_asset(download_url, tmpdir) downloaded = await download_asset(download_url, tmpdir)
if not downloaded: if not downloaded:
scan.status = ScanStatus.FAILED.value scan.status = ScanStatus.FAILED.value
scan.error_message = "Download failed" scan.error_message = "Download failed"
@@ -78,7 +78,7 @@ async def harvest(
await session.commit() await session.commit()
log.info("Scanning %s==%s", package_name, package_version) log.info("Scanning %s==%s", package_name, package_version)
result = scan_package(downloaded, ecosystem) result = await scan_package(downloaded, ecosystem)
findings_list = result.get("findings", []) findings_list = result.get("findings", [])

View File

@@ -1,25 +1,14 @@
"""Sonatype Nexus REST API client.""" """Sonatype Nexus REST API client using httpx async."""
import hashlib import hashlib
import os import os
import subprocess
import httpx
from guarddog_nexus.config import config from guarddog_nexus.config import config
from guarddog_nexus.logging_setup import log from guarddog_nexus.logging_setup import log
SUPPORTED_EXTENSIONS = (".tar.gz", ".tgz", ".whl", ".zip") SUPPORTED_EXTENSIONS = (".tar.gz", ".tgz", ".whl", ".zip")
PACKAGE_FILE_PATTERNS = ("packages/",)
def get_ecosystem_from_format(fmt: str) -> str | None:
mapping = {
"pypi": "pypi",
"npm": "npm",
"rubygems": "rubygems",
"go": "go",
"raw": None,
}
return mapping.get(fmt.lower() if fmt else "")
def extract_pypi_info(asset_path: str) -> tuple[str, str] | None: def extract_pypi_info(asset_path: str) -> tuple[str, str] | None:
@@ -33,33 +22,30 @@ def extract_pypi_info(asset_path: str) -> tuple[str, str] | None:
return None return None
def download_asset(download_url: str, dest_dir: str) -> str | None: async def download_asset(download_url: str, dest_dir: str) -> str | None:
"""Download an asset from Nexus using curl (available in Docker).""" """Download an asset from Nexus using async httpx."""
dest_path = os.path.join(dest_dir, os.path.basename(download_url.split("?")[0])) dest_path = os.path.join(dest_dir, os.path.basename(download_url.split("?")[0]))
auth = httpx.BasicAuth(config.nexus_username, config.nexus_password)
async with httpx.AsyncClient(auth=auth, timeout=120, follow_redirects=True) as client:
try: try:
result = subprocess.run( response = await client.get(download_url)
[ response.raise_for_status()
"curl", with open(dest_path, "wb") as f:
"-sfSL", f.write(response.content)
"-u",
f"{config.nexus_username}:{config.nexus_password}",
"-o",
dest_path,
download_url,
],
capture_output=True,
text=True,
timeout=120,
)
if result.returncode != 0:
log.warning("Failed to download %s: %s", download_url, result.stderr)
return None
return dest_path return dest_path
except Exception as e: except Exception as e:
log.error("Download error for %s: %s", download_url, e) log.warning("Failed to download %s: %s", download_url, e)
return None return None
async def nexus_get(path: str) -> httpx.Response:
"""Make an authenticated GET request to Nexus REST API."""
auth = httpx.BasicAuth(config.nexus_username, config.nexus_password)
async with httpx.AsyncClient(auth=auth, timeout=30) as client:
return await client.get(f"{config.nexus_url.rstrip('/')}{path}")
def compute_sha256(filepath: str) -> str: def compute_sha256(filepath: str) -> str:
h = hashlib.sha256() h = hashlib.sha256()
with open(filepath, "rb") as f: with open(filepath, "rb") as f:

View File

@@ -1,8 +1,8 @@
"""GuardDog CLI integration via subprocess.""" """GuardDog CLI integration via asyncio subprocess."""
import asyncio
import json import json
import shutil import shutil
import subprocess
from guarddog_nexus.config import config from guarddog_nexus.config import config
from guarddog_nexus.logging_setup import log from guarddog_nexus.logging_setup import log
@@ -10,39 +10,33 @@ from guarddog_nexus.logging_setup import log
GUARDDOG_BIN = shutil.which("guarddog") or "guarddog" GUARDDOG_BIN = shutil.which("guarddog") or "guarddog"
def scan_package(filepath: str, ecosystem: str = "pypi") -> dict: async def scan_package(filepath: str, ecosystem: str = "pypi") -> dict:
"""Run guarddog scan on a downloaded package file. Returns normalized dict.""" """Run guarddog scan on a downloaded package file. Returns normalized dict."""
cmd = [ cmd = [GUARDDOG_BIN, ecosystem, "scan", filepath, "--output-format", "json"]
GUARDDOG_BIN,
ecosystem,
"scan",
filepath,
"--output-format",
"json",
]
log.info("Running: %s", " ".join(cmd)) log.info("Running: %s", " ".join(cmd))
try: try:
result = subprocess.run( proc = await asyncio.create_subprocess_exec(
cmd, *cmd,
capture_output=True, stdout=asyncio.subprocess.PIPE,
text=True, stderr=asyncio.subprocess.PIPE,
timeout=config.scan_timeout_seconds,
) )
except subprocess.TimeoutExpired: stdout, stderr = await asyncio.wait_for(
proc.communicate(), timeout=config.scan_timeout_seconds
)
except asyncio.TimeoutError:
log.error("GuardDog scan timed out for %s", filepath) log.error("GuardDog scan timed out for %s", filepath)
return {"findings": [], "errors": ["timeout"]} return {"findings": [], "errors": ["timeout"]}
except FileNotFoundError: except FileNotFoundError:
log.error("GuardDog binary not found at %s", GUARDDOG_BIN) log.error("GuardDog binary not found at %s", GUARDDOG_BIN)
return {"findings": [], "errors": ["guarddog_not_found"]} return {"findings": [], "errors": ["guarddog_not_found"]}
if result.returncode not in (0, 1): if proc.returncode not in (0, 1):
log.error("GuardDog exited %d: %s", result.returncode, result.stderr) log.error("GuardDog exited %d: %s", proc.returncode, stderr.decode())
return {"findings": [], "errors": [result.stderr.strip()]} return {"findings": [], "errors": [stderr.decode().strip()]}
try: try:
data = json.loads(result.stdout) data = json.loads(stdout.decode())
except json.JSONDecodeError: except json.JSONDecodeError:
log.error("GuardDog returned invalid JSON for %s", filepath) log.error("GuardDog returned invalid JSON for %s", filepath)
return {"findings": [], "errors": ["json_parse_error"]} return {"findings": [], "errors": ["json_parse_error"]}

View File

@@ -115,23 +115,16 @@ async def nexus_webhook(
async def _scan_component(repository: str, name: str, version: str): async def _scan_component(repository: str, name: str, version: str):
"""Look up component assets via Nexus API, then scan each package file.""" """Look up component assets via Nexus API, then scan each package file."""
import subprocess
api_url = ( from guarddog_nexus.nexus_client import nexus_get
f"{config.nexus_url.rstrip('/')}/service/rest/v1/search"
f"?repository={repository}&name={name}&version={version}&format=pypi" api_path = (
f"/service/rest/v1/search?repository={repository}&name={name}&version={version}&format=pypi"
) )
try: try:
result = subprocess.run( resp = await nexus_get(api_path)
["curl", "-sf", "-u", f"{config.nexus_username}:{config.nexus_password}", api_url], resp.raise_for_status()
capture_output=True, data = resp.json()
text=True,
timeout=30,
)
if result.returncode != 0:
log.warning("Component lookup failed for %s==%s: %s", name, version, result.stderr)
return
data = json.loads(result.stdout)
except Exception as e: except Exception as e:
log.warning("Component lookup error for %s==%s: %s", name, version, e) log.warning("Component lookup error for %s==%s: %s", name, version, e)
return return