fix: async subprocess + httpx — no more event loop blocking during scans
This commit is contained in:
@@ -66,7 +66,7 @@ async def harvest(
|
|||||||
scan.status = ScanStatus.SCANNING.value
|
scan.status = ScanStatus.SCANNING.value
|
||||||
await session.commit()
|
await session.commit()
|
||||||
|
|
||||||
downloaded = download_asset(download_url, tmpdir)
|
downloaded = await download_asset(download_url, tmpdir)
|
||||||
if not downloaded:
|
if not downloaded:
|
||||||
scan.status = ScanStatus.FAILED.value
|
scan.status = ScanStatus.FAILED.value
|
||||||
scan.error_message = "Download failed"
|
scan.error_message = "Download failed"
|
||||||
@@ -78,7 +78,7 @@ async def harvest(
|
|||||||
await session.commit()
|
await session.commit()
|
||||||
|
|
||||||
log.info("Scanning %s==%s", package_name, package_version)
|
log.info("Scanning %s==%s", package_name, package_version)
|
||||||
result = scan_package(downloaded, ecosystem)
|
result = await scan_package(downloaded, ecosystem)
|
||||||
|
|
||||||
findings_list = result.get("findings", [])
|
findings_list = result.get("findings", [])
|
||||||
|
|
||||||
|
|||||||
@@ -1,25 +1,14 @@
|
|||||||
"""Sonatype Nexus REST API client."""
|
"""Sonatype Nexus REST API client using httpx async."""
|
||||||
|
|
||||||
import hashlib
|
import hashlib
|
||||||
import os
|
import os
|
||||||
import subprocess
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
from guarddog_nexus.config import config
|
from guarddog_nexus.config import config
|
||||||
from guarddog_nexus.logging_setup import log
|
from guarddog_nexus.logging_setup import log
|
||||||
|
|
||||||
SUPPORTED_EXTENSIONS = (".tar.gz", ".tgz", ".whl", ".zip")
|
SUPPORTED_EXTENSIONS = (".tar.gz", ".tgz", ".whl", ".zip")
|
||||||
PACKAGE_FILE_PATTERNS = ("packages/",)
|
|
||||||
|
|
||||||
|
|
||||||
def get_ecosystem_from_format(fmt: str) -> str | None:
|
|
||||||
mapping = {
|
|
||||||
"pypi": "pypi",
|
|
||||||
"npm": "npm",
|
|
||||||
"rubygems": "rubygems",
|
|
||||||
"go": "go",
|
|
||||||
"raw": None,
|
|
||||||
}
|
|
||||||
return mapping.get(fmt.lower() if fmt else "")
|
|
||||||
|
|
||||||
|
|
||||||
def extract_pypi_info(asset_path: str) -> tuple[str, str] | None:
|
def extract_pypi_info(asset_path: str) -> tuple[str, str] | None:
|
||||||
@@ -33,31 +22,28 @@ def extract_pypi_info(asset_path: str) -> tuple[str, str] | None:
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def download_asset(download_url: str, dest_dir: str) -> str | None:
|
async def download_asset(download_url: str, dest_dir: str) -> str | None:
|
||||||
"""Download an asset from Nexus using curl (available in Docker)."""
|
"""Download an asset from Nexus using async httpx."""
|
||||||
dest_path = os.path.join(dest_dir, os.path.basename(download_url.split("?")[0]))
|
dest_path = os.path.join(dest_dir, os.path.basename(download_url.split("?")[0]))
|
||||||
try:
|
|
||||||
result = subprocess.run(
|
auth = httpx.BasicAuth(config.nexus_username, config.nexus_password)
|
||||||
[
|
async with httpx.AsyncClient(auth=auth, timeout=120, follow_redirects=True) as client:
|
||||||
"curl",
|
try:
|
||||||
"-sfSL",
|
response = await client.get(download_url)
|
||||||
"-u",
|
response.raise_for_status()
|
||||||
f"{config.nexus_username}:{config.nexus_password}",
|
with open(dest_path, "wb") as f:
|
||||||
"-o",
|
f.write(response.content)
|
||||||
dest_path,
|
return dest_path
|
||||||
download_url,
|
except Exception as e:
|
||||||
],
|
log.warning("Failed to download %s: %s", download_url, e)
|
||||||
capture_output=True,
|
|
||||||
text=True,
|
|
||||||
timeout=120,
|
|
||||||
)
|
|
||||||
if result.returncode != 0:
|
|
||||||
log.warning("Failed to download %s: %s", download_url, result.stderr)
|
|
||||||
return None
|
return None
|
||||||
return dest_path
|
|
||||||
except Exception as e:
|
|
||||||
log.error("Download error for %s: %s", download_url, e)
|
async def nexus_get(path: str) -> httpx.Response:
|
||||||
return None
|
"""Make an authenticated GET request to Nexus REST API."""
|
||||||
|
auth = httpx.BasicAuth(config.nexus_username, config.nexus_password)
|
||||||
|
async with httpx.AsyncClient(auth=auth, timeout=30) as client:
|
||||||
|
return await client.get(f"{config.nexus_url.rstrip('/')}{path}")
|
||||||
|
|
||||||
|
|
||||||
def compute_sha256(filepath: str) -> str:
|
def compute_sha256(filepath: str) -> str:
|
||||||
|
|||||||
@@ -1,8 +1,8 @@
|
|||||||
"""GuardDog CLI integration via subprocess."""
|
"""GuardDog CLI integration via asyncio subprocess."""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
import json
|
import json
|
||||||
import shutil
|
import shutil
|
||||||
import subprocess
|
|
||||||
|
|
||||||
from guarddog_nexus.config import config
|
from guarddog_nexus.config import config
|
||||||
from guarddog_nexus.logging_setup import log
|
from guarddog_nexus.logging_setup import log
|
||||||
@@ -10,39 +10,33 @@ from guarddog_nexus.logging_setup import log
|
|||||||
GUARDDOG_BIN = shutil.which("guarddog") or "guarddog"
|
GUARDDOG_BIN = shutil.which("guarddog") or "guarddog"
|
||||||
|
|
||||||
|
|
||||||
def scan_package(filepath: str, ecosystem: str = "pypi") -> dict:
|
async def scan_package(filepath: str, ecosystem: str = "pypi") -> dict:
|
||||||
"""Run guarddog scan on a downloaded package file. Returns normalized dict."""
|
"""Run guarddog scan on a downloaded package file. Returns normalized dict."""
|
||||||
cmd = [
|
cmd = [GUARDDOG_BIN, ecosystem, "scan", filepath, "--output-format", "json"]
|
||||||
GUARDDOG_BIN,
|
|
||||||
ecosystem,
|
|
||||||
"scan",
|
|
||||||
filepath,
|
|
||||||
"--output-format",
|
|
||||||
"json",
|
|
||||||
]
|
|
||||||
|
|
||||||
log.info("Running: %s", " ".join(cmd))
|
log.info("Running: %s", " ".join(cmd))
|
||||||
|
|
||||||
try:
|
try:
|
||||||
result = subprocess.run(
|
proc = await asyncio.create_subprocess_exec(
|
||||||
cmd,
|
*cmd,
|
||||||
capture_output=True,
|
stdout=asyncio.subprocess.PIPE,
|
||||||
text=True,
|
stderr=asyncio.subprocess.PIPE,
|
||||||
timeout=config.scan_timeout_seconds,
|
|
||||||
)
|
)
|
||||||
except subprocess.TimeoutExpired:
|
stdout, stderr = await asyncio.wait_for(
|
||||||
|
proc.communicate(), timeout=config.scan_timeout_seconds
|
||||||
|
)
|
||||||
|
except asyncio.TimeoutError:
|
||||||
log.error("GuardDog scan timed out for %s", filepath)
|
log.error("GuardDog scan timed out for %s", filepath)
|
||||||
return {"findings": [], "errors": ["timeout"]}
|
return {"findings": [], "errors": ["timeout"]}
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
log.error("GuardDog binary not found at %s", GUARDDOG_BIN)
|
log.error("GuardDog binary not found at %s", GUARDDOG_BIN)
|
||||||
return {"findings": [], "errors": ["guarddog_not_found"]}
|
return {"findings": [], "errors": ["guarddog_not_found"]}
|
||||||
|
|
||||||
if result.returncode not in (0, 1):
|
if proc.returncode not in (0, 1):
|
||||||
log.error("GuardDog exited %d: %s", result.returncode, result.stderr)
|
log.error("GuardDog exited %d: %s", proc.returncode, stderr.decode())
|
||||||
return {"findings": [], "errors": [result.stderr.strip()]}
|
return {"findings": [], "errors": [stderr.decode().strip()]}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
data = json.loads(result.stdout)
|
data = json.loads(stdout.decode())
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
log.error("GuardDog returned invalid JSON for %s", filepath)
|
log.error("GuardDog returned invalid JSON for %s", filepath)
|
||||||
return {"findings": [], "errors": ["json_parse_error"]}
|
return {"findings": [], "errors": ["json_parse_error"]}
|
||||||
|
|||||||
@@ -115,23 +115,16 @@ async def nexus_webhook(
|
|||||||
|
|
||||||
async def _scan_component(repository: str, name: str, version: str):
|
async def _scan_component(repository: str, name: str, version: str):
|
||||||
"""Look up component assets via Nexus API, then scan each package file."""
|
"""Look up component assets via Nexus API, then scan each package file."""
|
||||||
import subprocess
|
|
||||||
|
|
||||||
api_url = (
|
from guarddog_nexus.nexus_client import nexus_get
|
||||||
f"{config.nexus_url.rstrip('/')}/service/rest/v1/search"
|
|
||||||
f"?repository={repository}&name={name}&version={version}&format=pypi"
|
api_path = (
|
||||||
|
f"/service/rest/v1/search?repository={repository}&name={name}&version={version}&format=pypi"
|
||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
result = subprocess.run(
|
resp = await nexus_get(api_path)
|
||||||
["curl", "-sf", "-u", f"{config.nexus_username}:{config.nexus_password}", api_url],
|
resp.raise_for_status()
|
||||||
capture_output=True,
|
data = resp.json()
|
||||||
text=True,
|
|
||||||
timeout=30,
|
|
||||||
)
|
|
||||||
if result.returncode != 0:
|
|
||||||
log.warning("Component lookup failed for %s==%s: %s", name, version, result.stderr)
|
|
||||||
return
|
|
||||||
data = json.loads(result.stdout)
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.warning("Component lookup error for %s==%s: %s", name, version, e)
|
log.warning("Component lookup error for %s==%s: %s", name, version, e)
|
||||||
return
|
return
|
||||||
|
|||||||
Reference in New Issue
Block a user