feat: guarddog-nexus — webhook-based PyPI scanner with web UI
This commit is contained in:
129
guarddog_nexus/harvester.py
Normal file
129
guarddog_nexus/harvester.py
Normal file
@@ -0,0 +1,129 @@
|
||||
"""Harvester: download a package from Nexus, scan it, store results."""
|
||||
|
||||
import datetime
|
||||
import os
|
||||
import tempfile
|
||||
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from guarddog_nexus.config import config
|
||||
from guarddog_nexus.logging_setup import log
|
||||
from guarddog_nexus.models import Finding, Scan, ScanStatus
|
||||
from guarddog_nexus.nexus_client import (
|
||||
SUPPORTED_EXTENSIONS,
|
||||
compute_sha256,
|
||||
download_asset,
|
||||
extract_pypi_info,
|
||||
)
|
||||
from guarddog_nexus.scanner import scan_package
|
||||
|
||||
|
||||
async def harvest(
|
||||
download_url: str,
|
||||
repository: str,
|
||||
format_: str,
|
||||
asset_path: str,
|
||||
session: AsyncSession,
|
||||
) -> Scan | None:
|
||||
"""Download, scan, and store results for a single package asset."""
|
||||
ecosystem = "pypi" if format_ in ("pypi",) else format_
|
||||
|
||||
filename = os.path.basename(download_url.split("?")[0])
|
||||
if not filename.endswith(SUPPORTED_EXTENSIONS):
|
||||
log.info("Skipping non-package asset: %s", filename)
|
||||
return None
|
||||
|
||||
info = extract_pypi_info(asset_path)
|
||||
if info is None:
|
||||
log.warning("Could not parse package info from path: %s", asset_path)
|
||||
return None
|
||||
|
||||
package_name, package_version = info
|
||||
|
||||
existing = await session.scalar(
|
||||
select(Scan.id).where(
|
||||
Scan.package_name == package_name,
|
||||
Scan.package_version == package_version,
|
||||
Scan.repository == repository,
|
||||
)
|
||||
)
|
||||
if existing:
|
||||
log.info("Already scanned %s==%s, skipping", package_name, package_version)
|
||||
return None
|
||||
|
||||
scan = Scan(
|
||||
package_name=package_name,
|
||||
package_version=package_version,
|
||||
ecosystem=ecosystem,
|
||||
repository=repository,
|
||||
nexus_asset_url=download_url,
|
||||
status=ScanStatus.PENDING.value,
|
||||
)
|
||||
session.add(scan)
|
||||
await session.commit()
|
||||
await session.refresh(scan)
|
||||
|
||||
os.makedirs(config.temp_dir, exist_ok=True)
|
||||
tmpdir = tempfile.mkdtemp(dir=config.temp_dir)
|
||||
|
||||
try:
|
||||
scan.status = ScanStatus.SCANNING.value
|
||||
await session.commit()
|
||||
|
||||
downloaded = download_asset(download_url, tmpdir)
|
||||
if not downloaded:
|
||||
scan.status = ScanStatus.FAILED.value
|
||||
scan.error_message = "Download failed"
|
||||
scan.finished_at = datetime.datetime.now(datetime.timezone.utc)
|
||||
await session.commit()
|
||||
return scan
|
||||
|
||||
scan.sha256 = compute_sha256(downloaded)
|
||||
await session.commit()
|
||||
|
||||
log.info("Scanning %s==%s", package_name, package_version)
|
||||
result = scan_package(downloaded, ecosystem)
|
||||
|
||||
findings_list = result.get("findings", [])
|
||||
|
||||
for fdata in findings_list:
|
||||
finding = Finding(
|
||||
scan_id=scan.id,
|
||||
rule=fdata["rule"],
|
||||
severity=fdata["severity"],
|
||||
message=fdata["message"],
|
||||
location=fdata.get("location"),
|
||||
)
|
||||
session.add(finding)
|
||||
|
||||
scan.total_findings = len(findings_list)
|
||||
scan.flagged = len(findings_list) > 0
|
||||
scan.status = ScanStatus.COMPLETED.value
|
||||
scan.finished_at = datetime.datetime.now(datetime.timezone.utc)
|
||||
await session.commit()
|
||||
|
||||
if scan.flagged:
|
||||
log.warning(
|
||||
"FLAGGED %s==%s: %d findings in repo %s",
|
||||
package_name,
|
||||
package_version,
|
||||
scan.total_findings,
|
||||
repository,
|
||||
)
|
||||
|
||||
log.info(
|
||||
"Scan complete: %s==%s (%d findings)",
|
||||
package_name,
|
||||
package_version,
|
||||
scan.total_findings,
|
||||
)
|
||||
return scan
|
||||
|
||||
except Exception as e:
|
||||
log.error("Scan failed for %s==%s: %s", package_name, package_version, e)
|
||||
scan.status = ScanStatus.FAILED.value
|
||||
scan.error_message = str(e)[:1000]
|
||||
scan.finished_at = datetime.datetime.now(datetime.timezone.utc)
|
||||
await session.commit()
|
||||
return scan
|
||||
Reference in New Issue
Block a user