From 4ce99d3c85a3fabdd9ca63f9ffc029a2cd86fec4 Mon Sep 17 00:00:00 2001 From: Marker689 Date: Sat, 9 May 2026 04:48:10 +0300 Subject: [PATCH] =?UTF-8?q?feat:=20guarddog-nexus=20=E2=80=94=20webhook-ba?= =?UTF-8?q?sed=20PyPI=20scanner=20with=20web=20UI?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Dockerfile | 22 ++ docker-compose.yml | 44 ++++ guarddog_nexus/__init__.py | 0 guarddog_nexus/api/__init__.py | 0 guarddog_nexus/api/findings.py | 49 +++++ guarddog_nexus/api/packages.py | 122 +++++++++++ guarddog_nexus/api/scans.py | 120 +++++++++++ guarddog_nexus/harvester.py | 129 ++++++++++++ guarddog_nexus/logging_setup.py | 43 ++++ guarddog_nexus/main.py | 61 ++++++ guarddog_nexus/models.py | 58 ++++++ guarddog_nexus/nexus_client.py | 65 ++++++ guarddog_nexus/scanner.py | 74 +++++++ guarddog_nexus/static/style.css | 1 + guarddog_nexus/web/__init__.py | 0 guarddog_nexus/web/routes.py | 191 ++++++++++++++++++ guarddog_nexus/web/templates/base.html | 41 ++++ guarddog_nexus/web/templates/dashboard.html | 8 + .../web/templates/dashboard_stats.html | 56 +++++ .../web/templates/package_detail.html | 36 ++++ .../web/templates/packages_list.html | 48 +++++ guarddog_nexus/web/templates/scan_detail.html | 30 +++ guarddog_nexus/web/templates/scans_list.html | 48 +++++ guarddog_nexus/webhooks.py | 125 ++++++++++++ pyproject.toml | 4 + scripts/setup-nexus.sh | 73 +++++++ tests/__init__.py | 0 tests/conftest.py | 131 ++++++++++++ tests/test_api.py | 72 +++++++ tests/test_harvester.py | 114 +++++++++++ tests/test_scanner.py | 28 +++ tests/test_webhooks.py | 72 +++++++ 32 files changed, 1865 insertions(+) create mode 100644 Dockerfile create mode 100644 docker-compose.yml create mode 100644 guarddog_nexus/__init__.py create mode 100644 guarddog_nexus/api/__init__.py create mode 100644 guarddog_nexus/api/findings.py create mode 100644 guarddog_nexus/api/packages.py create mode 100644 guarddog_nexus/api/scans.py create mode 100644 guarddog_nexus/harvester.py create mode 100644 guarddog_nexus/logging_setup.py create mode 100644 guarddog_nexus/main.py create mode 100644 guarddog_nexus/models.py create mode 100644 guarddog_nexus/nexus_client.py create mode 100644 guarddog_nexus/scanner.py create mode 100644 guarddog_nexus/static/style.css create mode 100644 guarddog_nexus/web/__init__.py create mode 100644 guarddog_nexus/web/routes.py create mode 100644 guarddog_nexus/web/templates/base.html create mode 100644 guarddog_nexus/web/templates/dashboard.html create mode 100644 guarddog_nexus/web/templates/dashboard_stats.html create mode 100644 guarddog_nexus/web/templates/package_detail.html create mode 100644 guarddog_nexus/web/templates/packages_list.html create mode 100644 guarddog_nexus/web/templates/scan_detail.html create mode 100644 guarddog_nexus/web/templates/scans_list.html create mode 100644 guarddog_nexus/webhooks.py create mode 100644 scripts/setup-nexus.sh create mode 100644 tests/__init__.py create mode 100644 tests/conftest.py create mode 100644 tests/test_api.py create mode 100644 tests/test_harvester.py create mode 100644 tests/test_scanner.py create mode 100644 tests/test_webhooks.py diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..432bff7 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,22 @@ +FROM python:3.12-slim-bookworm + +RUN apt-get update && apt-get install -y --no-install-recommends curl ca-certificates \ + && rm -rf /var/lib/apt/lists/* + +COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /usr/local/bin/ + +WORKDIR /app +COPY pyproject.toml ./ +COPY guarddog_nexus/ guarddog_nexus/ + +RUN uv pip install --system guarddog +RUN uv pip install --system -e . + +RUN mkdir -p /data /tmp/guarddog-nexus + +ENV DATABASE_PATH=/data/guarddog.db +ENV TEMP_DIR=/tmp/guarddog-nexus + +EXPOSE 8080 + +CMD ["python", "-m", "guarddog_nexus.main"] diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..c8a6914 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,44 @@ +services: + guarddog-nexus: + build: . + ports: + - "8080:8080" + environment: + NEXUS_URL: http://nexus:8081 + NEXUS_USERNAME: admin + NEXUS_PASSWORD: "${NEXUS_PASSWORD:-admin123}" + NEXUS_REPOSITORIES: pypi-proxy + LOG_LEVEL: INFO + LOG_SYSLOG_HOST: "" + HOST: "0.0.0.0" + PORT: "8080" + volumes: + - ./data:/data + depends_on: + nexus-setup: + condition: service_completed_successfully + restart: unless-stopped + + nexus: + image: sonatype/nexus3:3.79.0 + ports: + - "8081:8081" + volumes: + - nexus-data:/nexus-data + restart: unless-stopped + + nexus-setup: + image: alpine:3.21 + volumes: + - ./scripts/setup-nexus.sh:/setup.sh:ro + - nexus-data:/nexus-data:ro + environment: + NEXUS_URL: http://nexus:8081 + ADMIN_PASSWORD: "${NEXUS_PASSWORD:-admin123}" + WEBHOOK_URL: http://guarddog-nexus:8080/webhooks/nexus + entrypoint: ["/bin/sh", "/setup.sh"] + depends_on: + - nexus + +volumes: + nexus-data: diff --git a/guarddog_nexus/__init__.py b/guarddog_nexus/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/guarddog_nexus/api/__init__.py b/guarddog_nexus/api/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/guarddog_nexus/api/findings.py b/guarddog_nexus/api/findings.py new file mode 100644 index 0000000..255e3d1 --- /dev/null +++ b/guarddog_nexus/api/findings.py @@ -0,0 +1,49 @@ +"""REST API for findings (across all scans).""" + +from fastapi import APIRouter, Depends, Query +from sqlalchemy import func, select +from sqlalchemy.ext.asyncio import AsyncSession + +from guarddog_nexus.database import get_session +from guarddog_nexus.models import Finding + +router = APIRouter(prefix="/api/v1/findings", tags=["findings"]) + + +@router.get("") +async def list_findings( + limit: int = Query(50, le=200), + offset: int = Query(0, ge=0), + rule: str | None = Query(None), + severity: str | None = Query(None), + scan_id: int | None = Query(None), + session: AsyncSession = Depends(get_session), +): + q = select(Finding) + if rule: + q = q.where(Finding.rule == rule) + if severity: + q = q.where(Finding.severity == severity) + if scan_id: + q = q.where(Finding.scan_id == scan_id) + + total = await session.scalar(select(func.count()).select_from(q.subquery())) + findings = (await session.execute(q.offset(offset).limit(limit))).scalars().all() + + return { + "total": total, + "limit": limit, + "offset": offset, + "findings": [ + { + "id": f.id, + "scan_id": f.scan_id, + "rule": f.rule, + "severity": f.severity, + "message": f.message, + "location": f.location, + "created_at": f.created_at.isoformat() if f.created_at else None, + } + for f in findings + ], + } diff --git a/guarddog_nexus/api/packages.py b/guarddog_nexus/api/packages.py new file mode 100644 index 0000000..88d4839 --- /dev/null +++ b/guarddog_nexus/api/packages.py @@ -0,0 +1,122 @@ +"""REST API for packages (distinct packages across scans).""" + +from fastapi import APIRouter, Depends, Query +from sqlalchemy import func, select +from sqlalchemy.ext.asyncio import AsyncSession + +from guarddog_nexus.database import get_session +from guarddog_nexus.models import Finding, Scan + +router = APIRouter(prefix="/api/v1/packages", tags=["packages"]) + + +@router.get("") +async def list_packages( + limit: int = Query(50, le=200), + offset: int = Query(0, ge=0), + ecosystem: str | None = Query(None), + flagged: bool | None = Query(None), + session: AsyncSession = Depends(get_session), +): + subq = ( + select( + Scan.package_name, + Scan.package_version, + Scan.ecosystem, + Scan.repository, + func.max(Scan.started_at).label("last_scanned_at"), + func.max(Scan.flagged).label("is_flagged"), + func.sum(Scan.total_findings).label("total_findings"), + func.max(Scan.id).label("latest_scan_id"), + ) + .group_by(Scan.package_name, Scan.package_version) + ) + + if ecosystem: + subq = subq.where(Scan.ecosystem == ecosystem) + if flagged is not None: + subq = subq.having(func.max(Scan.flagged) == flagged) + + total_q = select(func.count()).select_from(subq.subquery()) + total = await session.scalar(total_q) + + rows = ( + (await session.execute( + subq.order_by(func.max(Scan.started_at).desc()).offset(offset).limit(limit) + )) + .all() + ) + + return { + "total": total, + "limit": limit, + "offset": offset, + "packages": [ + { + "name": r.package_name, + "version": r.package_version, + "ecosystem": r.ecosystem, + "repository": r.repository, + "last_scanned_at": r.last_scanned_at.isoformat() if r.last_scanned_at else None, + "flagged": bool(r.is_flagged), + "total_findings": r.total_findings, + "latest_scan_id": r.latest_scan_id, + } + for r in rows + ], + } + + +@router.get("/{name}/{version}") +async def get_package( + name: str, + version: str, + session: AsyncSession = Depends(get_session), +): + scans = ( + await session.execute( + select(Scan) + .where(Scan.package_name == name, Scan.package_version == version) + .order_by(Scan.started_at.desc()) + ) + ).scalars().all() + + if not scans: + return {"detail": "Not found"} + + all_findings = [] + for s in scans: + findings = ( + await session.execute( + select(Finding).where(Finding.scan_id == s.id) + ) + ).scalars().all() + all_findings.extend(f.__dict__ for f in findings) + + return { + "name": scans[0].package_name, + "version": scans[0].package_version, + "ecosystem": scans[0].ecosystem, + "repository": scans[0].repository, + "flagged": any(s.flagged for s in scans), + "scans": [ + { + "id": s.id, + "status": s.status, + "total_findings": s.total_findings, + "flagged": s.flagged, + "started_at": s.started_at.isoformat() if s.started_at else None, + } + for s in scans + ], + "findings": [ + { + "id": f["id"], + "rule": f.get("rule"), + "severity": f.get("severity"), + "message": f.get("message"), + "location": f.get("location"), + } + for f in all_findings + ], + } diff --git a/guarddog_nexus/api/scans.py b/guarddog_nexus/api/scans.py new file mode 100644 index 0000000..a0a9fcb --- /dev/null +++ b/guarddog_nexus/api/scans.py @@ -0,0 +1,120 @@ +"""REST API for scans.""" + +from fastapi import APIRouter, Depends, Query +from sqlalchemy import func, select +from sqlalchemy.ext.asyncio import AsyncSession +from sqlalchemy.orm import selectinload + +from guarddog_nexus.database import get_session +from guarddog_nexus.models import Finding, Scan + +router = APIRouter(prefix="/api/v1/scans", tags=["scans"]) + + +@router.get("") +async def list_scans( + limit: int = Query(50, le=200), + offset: int = Query(0, ge=0), + flagged: bool | None = Query(None), + session: AsyncSession = Depends(get_session), +): + q = select(Scan) + if flagged is not None: + q = q.where(Scan.flagged == flagged) + q = q.order_by(Scan.started_at.desc()).offset(offset).limit(limit) + + total = await session.scalar(select(func.count(Scan.id))) + + scans = (await session.execute(q)).scalars().all() + return { + "total": total, + "limit": limit, + "offset": offset, + "scans": [ + { + "id": s.id, + "package_name": s.package_name, + "package_version": s.package_version, + "ecosystem": s.ecosystem, + "repository": s.repository, + "status": s.status, + "total_findings": s.total_findings, + "flagged": s.flagged, + "started_at": s.started_at.isoformat() if s.started_at else None, + "finished_at": s.finished_at.isoformat() if s.finished_at else None, + "error_message": s.error_message, + } + for s in scans + ], + } + + +@router.get("/stats") +async def scan_stats(session: AsyncSession = Depends(get_session)): + total_scans = await session.scalar(select(func.count(Scan.id))) + flagged_scans = await session.scalar( + select(func.count(Scan.id)).where(Scan.flagged == True) + ) + recent_flagged = await session.scalar( + select(func.count(Scan.id)).where( + Scan.flagged == True, + Scan.started_at >= func.datetime("now", "-7 days"), + ) + ) + total_findings = await session.scalar(select(func.count(Finding.id))) + + top_rules = ( + await session.execute( + select(Finding.rule, func.count(Finding.id).label("cnt")) + .group_by(Finding.rule) + .order_by(func.count(Finding.id).desc()) + .limit(10) + ) + ).all() + + latest_scan = await session.scalar( + select(Scan).order_by(Scan.started_at.desc()).limit(1) + ) + + return { + "total_scans": total_scans, + "flagged_scans": flagged_scans, + "recent_flagged": recent_flagged, + "total_findings": total_findings, + "top_rules": [{"rule": r.rule, "count": r.cnt} for r in top_rules], + "latest_scan_at": latest_scan.started_at.isoformat() if latest_scan else None, + } + + +@router.get("/{scan_id}") +async def get_scan(scan_id: int, session: AsyncSession = Depends(get_session)): + scan = await session.scalar( + select(Scan).where(Scan.id == scan_id).options(selectinload(Scan.findings)) + ) + if not scan: + return {"detail": "Not found"} + return { + "id": scan.id, + "package_name": scan.package_name, + "package_version": scan.package_version, + "ecosystem": scan.ecosystem, + "repository": scan.repository, + "nexus_asset_url": scan.nexus_asset_url, + "sha256": scan.sha256, + "status": scan.status, + "total_findings": scan.total_findings, + "flagged": scan.flagged, + "started_at": scan.started_at.isoformat() if scan.started_at else None, + "finished_at": scan.finished_at.isoformat() if scan.finished_at else None, + "error_message": scan.error_message, + "findings": [ + { + "id": f.id, + "rule": f.rule, + "severity": f.severity, + "message": f.message, + "location": f.location, + } + for f in scan.findings + ], + } diff --git a/guarddog_nexus/harvester.py b/guarddog_nexus/harvester.py new file mode 100644 index 0000000..f3ad9f5 --- /dev/null +++ b/guarddog_nexus/harvester.py @@ -0,0 +1,129 @@ +"""Harvester: download a package from Nexus, scan it, store results.""" + +import datetime +import os +import tempfile + +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession + +from guarddog_nexus.config import config +from guarddog_nexus.logging_setup import log +from guarddog_nexus.models import Finding, Scan, ScanStatus +from guarddog_nexus.nexus_client import ( + SUPPORTED_EXTENSIONS, + compute_sha256, + download_asset, + extract_pypi_info, +) +from guarddog_nexus.scanner import scan_package + + +async def harvest( + download_url: str, + repository: str, + format_: str, + asset_path: str, + session: AsyncSession, +) -> Scan | None: + """Download, scan, and store results for a single package asset.""" + ecosystem = "pypi" if format_ in ("pypi",) else format_ + + filename = os.path.basename(download_url.split("?")[0]) + if not filename.endswith(SUPPORTED_EXTENSIONS): + log.info("Skipping non-package asset: %s", filename) + return None + + info = extract_pypi_info(asset_path) + if info is None: + log.warning("Could not parse package info from path: %s", asset_path) + return None + + package_name, package_version = info + + existing = await session.scalar( + select(Scan.id).where( + Scan.package_name == package_name, + Scan.package_version == package_version, + Scan.repository == repository, + ) + ) + if existing: + log.info("Already scanned %s==%s, skipping", package_name, package_version) + return None + + scan = Scan( + package_name=package_name, + package_version=package_version, + ecosystem=ecosystem, + repository=repository, + nexus_asset_url=download_url, + status=ScanStatus.PENDING.value, + ) + session.add(scan) + await session.commit() + await session.refresh(scan) + + os.makedirs(config.temp_dir, exist_ok=True) + tmpdir = tempfile.mkdtemp(dir=config.temp_dir) + + try: + scan.status = ScanStatus.SCANNING.value + await session.commit() + + downloaded = download_asset(download_url, tmpdir) + if not downloaded: + scan.status = ScanStatus.FAILED.value + scan.error_message = "Download failed" + scan.finished_at = datetime.datetime.now(datetime.timezone.utc) + await session.commit() + return scan + + scan.sha256 = compute_sha256(downloaded) + await session.commit() + + log.info("Scanning %s==%s", package_name, package_version) + result = scan_package(downloaded, ecosystem) + + findings_list = result.get("findings", []) + + for fdata in findings_list: + finding = Finding( + scan_id=scan.id, + rule=fdata["rule"], + severity=fdata["severity"], + message=fdata["message"], + location=fdata.get("location"), + ) + session.add(finding) + + scan.total_findings = len(findings_list) + scan.flagged = len(findings_list) > 0 + scan.status = ScanStatus.COMPLETED.value + scan.finished_at = datetime.datetime.now(datetime.timezone.utc) + await session.commit() + + if scan.flagged: + log.warning( + "FLAGGED %s==%s: %d findings in repo %s", + package_name, + package_version, + scan.total_findings, + repository, + ) + + log.info( + "Scan complete: %s==%s (%d findings)", + package_name, + package_version, + scan.total_findings, + ) + return scan + + except Exception as e: + log.error("Scan failed for %s==%s: %s", package_name, package_version, e) + scan.status = ScanStatus.FAILED.value + scan.error_message = str(e)[:1000] + scan.finished_at = datetime.datetime.now(datetime.timezone.utc) + await session.commit() + return scan diff --git a/guarddog_nexus/logging_setup.py b/guarddog_nexus/logging_setup.py new file mode 100644 index 0000000..68f5bbc --- /dev/null +++ b/guarddog_nexus/logging_setup.py @@ -0,0 +1,43 @@ +"""Structured logging with syslog support.""" + +import json +import logging +import sys +from logging.handlers import SysLogHandler + +from guarddog_nexus.config import config + + +class JsonFormatter(logging.Formatter): + def format(self, record: logging.LogRecord) -> str: + payload = { + "timestamp": self.formatTime(record, self.datefmt), + "level": record.levelname, + "logger": record.name, + "message": record.getMessage(), + } + if record.exc_info and record.exc_info[1]: + payload["exception"] = str(record.exc_info[1]) + return json.dumps(payload, ensure_ascii=False) + + +def setup_logging() -> logging.Logger: + logger = logging.getLogger("guarddog_nexus") + logger.setLevel(config.log_level.upper()) + + stdout_handler = logging.StreamHandler(sys.stdout) + stdout_handler.setFormatter(JsonFormatter()) + logger.addHandler(stdout_handler) + + if config.log_syslog_host: + syslog_handler = SysLogHandler( + address=(config.log_syslog_host, config.log_syslog_port), + facility=SysLogHandler.LOG_LOCAL0, + ) + syslog_handler.setFormatter(JsonFormatter()) + logger.addHandler(syslog_handler) + + return logger + + +log = setup_logging() diff --git a/guarddog_nexus/main.py b/guarddog_nexus/main.py new file mode 100644 index 0000000..7bbe78a --- /dev/null +++ b/guarddog_nexus/main.py @@ -0,0 +1,61 @@ +"""GuardDog Nexus — FastAPI application entry point.""" + +import os +from contextlib import asynccontextmanager + +import uvicorn +from fastapi import FastAPI +from fastapi.staticfiles import StaticFiles + +from guarddog_nexus.api import findings, packages, scans +from guarddog_nexus.config import config +from guarddog_nexus.database import init_db +from guarddog_nexus.logging_setup import log +from guarddog_nexus.web.routes import router as web_router +from guarddog_nexus.webhooks import router as webhook_router + +STATIC_DIR = os.path.join(os.path.dirname(__file__), "static") + + +@asynccontextmanager +async def lifespan(app: FastAPI): + await init_db() + log.info("GuardDog Nexus started on %s:%s", config.host, config.port) + yield + log.info("GuardDog Nexus shutting down") + + +app = FastAPI( + title="GuardDog Nexus", + version="0.1.0", + description="Scan PyPI packages from Sonatype Nexus webhooks using GuardDog", + lifespan=lifespan, +) + +app.include_router(webhook_router) +app.include_router(scans.router) +app.include_router(packages.router) +app.include_router(findings.router) +app.include_router(web_router) + +if os.path.isdir(STATIC_DIR): + app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static") + + +@app.get("/health") +async def health(): + return {"status": "ok", "version": "0.1.0"} + + +def main(): + uvicorn.run( + "guarddog_nexus.main:app", + host=config.host, + port=config.port, + log_level=config.log_level.lower(), + reload=False, + ) + + +if __name__ == "__main__": + main() diff --git a/guarddog_nexus/models.py b/guarddog_nexus/models.py new file mode 100644 index 0000000..8a3830d --- /dev/null +++ b/guarddog_nexus/models.py @@ -0,0 +1,58 @@ +"""SQLAlchemy ORM models.""" + +import datetime +from enum import Enum + +from sqlalchemy import Boolean, DateTime, ForeignKey, Integer, String, Text, func +from sqlalchemy.orm import Mapped, mapped_column, relationship + +from guarddog_nexus.database import Base + + +class ScanStatus(str, Enum): + PENDING = "pending" + SCANNING = "scanning" + COMPLETED = "completed" + FAILED = "failed" + + +class Scan(Base): + __tablename__ = "scans" + + id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) + package_name: Mapped[str] = mapped_column(String(255), nullable=False) + package_version: Mapped[str] = mapped_column(String(255), nullable=False) + ecosystem: Mapped[str] = mapped_column(String(50), nullable=False, default="pypi") + repository: Mapped[str] = mapped_column(String(255), nullable=False) + nexus_asset_url: Mapped[str] = mapped_column(Text, nullable=False) + sha256: Mapped[str | None] = mapped_column(String(64), nullable=True) + status: Mapped[str] = mapped_column( + String(20), nullable=False, default=ScanStatus.PENDING.value + ) + total_findings: Mapped[int] = mapped_column(Integer, nullable=False, default=0) + flagged: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False) + started_at: Mapped[datetime.datetime] = mapped_column( + DateTime, nullable=False, default=func.now() + ) + finished_at: Mapped[datetime.datetime | None] = mapped_column(DateTime, nullable=True) + error_message: Mapped[str | None] = mapped_column(Text, nullable=True) + + findings: Mapped[list["Finding"]] = relationship( + "Finding", back_populates="scan", cascade="all, delete-orphan" + ) + + +class Finding(Base): + __tablename__ = "findings" + + id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) + scan_id: Mapped[int] = mapped_column(Integer, ForeignKey("scans.id"), nullable=False) + rule: Mapped[str] = mapped_column(String(255), nullable=False) + severity: Mapped[str] = mapped_column(String(50), nullable=False) + message: Mapped[str] = mapped_column(Text, nullable=False) + location: Mapped[str | None] = mapped_column(String(512), nullable=True) + created_at: Mapped[datetime.datetime] = mapped_column( + DateTime, nullable=False, default=func.now() + ) + + scan: Mapped["Scan"] = relationship("Scan", back_populates="findings") diff --git a/guarddog_nexus/nexus_client.py b/guarddog_nexus/nexus_client.py new file mode 100644 index 0000000..05bdf9e --- /dev/null +++ b/guarddog_nexus/nexus_client.py @@ -0,0 +1,65 @@ +"""Sonatype Nexus REST API client.""" + +import hashlib +import os +import subprocess + +from guarddog_nexus.config import config +from guarddog_nexus.logging_setup import log + +SUPPORTED_EXTENSIONS = (".tar.gz", ".tgz", ".whl", ".zip") +PACKAGE_FILE_PATTERNS = ("packages/",) + + +def get_ecosystem_from_format(fmt: str) -> str | None: + mapping = { + "pypi": "pypi", + "npm": "npm", + "rubygems": "rubygems", + "go": "go", + "raw": None, + } + return mapping.get(fmt.lower() if fmt else "") + + +def extract_pypi_info(asset_path: str) -> tuple[str, str] | None: + """Extract package name and version from a PyPI asset path. + + Path format: packages/requests/2.31.0/requests-2.31.0.tar.gz + """ + parts = asset_path.strip("/").split("/") + if len(parts) >= 3 and parts[0] == "packages": + return parts[1], parts[2] + return None + + +def download_asset(download_url: str, dest_dir: str) -> str | None: + """Download an asset from Nexus using curl (available in Docker).""" + dest_path = os.path.join(dest_dir, os.path.basename(download_url.split("?")[0])) + try: + result = subprocess.run( + [ + "curl", "-sfSL", + "-u", f"{config.nexus_username}:{config.nexus_password}", + "-o", dest_path, + download_url, + ], + capture_output=True, + text=True, + timeout=120, + ) + if result.returncode != 0: + log.warning("Failed to download %s: %s", download_url, result.stderr) + return None + return dest_path + except Exception as e: + log.error("Download error for %s: %s", download_url, e) + return None + + +def compute_sha256(filepath: str) -> str: + h = hashlib.sha256() + with open(filepath, "rb") as f: + for chunk in iter(lambda: f.read(8192), b""): + h.update(chunk) + return h.hexdigest() diff --git a/guarddog_nexus/scanner.py b/guarddog_nexus/scanner.py new file mode 100644 index 0000000..1cce125 --- /dev/null +++ b/guarddog_nexus/scanner.py @@ -0,0 +1,74 @@ +"""GuardDog CLI integration via subprocess.""" + +import json +import shutil +import subprocess + +from guarddog_nexus.config import config +from guarddog_nexus.logging_setup import log + +GUARDDOG_BIN = shutil.which("guarddog") or "guarddog" + + +def scan_package(filepath: str, ecosystem: str = "pypi") -> dict: + """Run guarddog scan on a downloaded package file. Returns parsed JSON output.""" + cmd = [ + GUARDDOG_BIN, ecosystem, "scan", filepath, + "--output-format", "json", + ] + + log.info("Running: %s", " ".join(cmd)) + + try: + result = subprocess.run( + cmd, + capture_output=True, + text=True, + timeout=config.scan_timeout_seconds, + ) + except subprocess.TimeoutExpired: + log.error("GuardDog scan timed out for %s", filepath) + return {"issues": [], "errors": ["timeout"]} + except FileNotFoundError: + log.error("GuardDog binary not found at %s", GUARDDOG_BIN) + return {"issues": [], "errors": ["guarddog_not_found"]} + + if result.returncode not in (0, 1): + log.error("GuardDog exited %d: %s", result.returncode, result.stderr) + return {"issues": [], "errors": [result.stderr.strip()]} + + try: + data = json.loads(result.stdout) + except json.JSONDecodeError: + log.error("GuardDog returned invalid JSON for %s", filepath) + return {"issues": [], "errors": ["json_parse_error"]} + + return _normalize_output(data) + + +def _normalize_output(data: dict) -> dict: + """Normalize guarddog JSON output across versions into a consistent format. + + GuardDog JSON format (varies by version): + { + "results": [{"rule": "...", "severity": "...", "message": "...", "location": "..."}], + "errors": [...] + } + Or simpler: + {"issues": [...], "errors": [...]} + """ + findings = [] + + for entry in data.get("results", data.get("issues", [])): + if isinstance(entry, dict): + findings.append({ + "rule": entry.get("rule", entry.get("id", "unknown")), + "severity": entry.get("severity", "WARNING"), + "message": entry.get("message", entry.get("description", "")), + "location": entry.get("location", entry.get("path", "")), + }) + + return { + "findings": findings, + "errors": data.get("errors", []), + } diff --git a/guarddog_nexus/static/style.css b/guarddog_nexus/static/style.css new file mode 100644 index 0000000..28213ba --- /dev/null +++ b/guarddog_nexus/static/style.css @@ -0,0 +1 @@ +/* static/style.css - minimal overrides for Pico.css dark theme */ diff --git a/guarddog_nexus/web/__init__.py b/guarddog_nexus/web/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/guarddog_nexus/web/routes.py b/guarddog_nexus/web/routes.py new file mode 100644 index 0000000..faa744b --- /dev/null +++ b/guarddog_nexus/web/routes.py @@ -0,0 +1,191 @@ +"""Web UI routes — Jinja2 + htmx pages.""" + +import datetime + +from fastapi import APIRouter, Depends, Request +from fastapi.responses import HTMLResponse +from sqlalchemy import func, select +from sqlalchemy.ext.asyncio import AsyncSession + +from guarddog_nexus.database import get_session +from guarddog_nexus.models import Finding, Scan + +router = APIRouter(tags=["web"]) +TEMPLATES: dict[str, str] = {} + + +def _render(name: str, **context) -> HTMLResponse: + from jinja2 import Environment, PackageLoader, select_autoescape + + env = Environment( + loader=PackageLoader("guarddog_nexus", "web/templates"), + autoescape=select_autoescape(), + ) + template = env.get_template(name) + return HTMLResponse(template.render(**context)) + + +@router.get("/", response_class=HTMLResponse) +async def dashboard(request: Request, session: AsyncSession = Depends(get_session)): + total_scans = await session.scalar(select(func.count(Scan.id))) + flagged_scans = await session.scalar( + select(func.count(Scan.id)).where(Scan.flagged == True) + ) + recent_flagged = await session.scalar( + select(func.count(Scan.id)).where( + Scan.flagged == True, + Scan.started_at >= func.datetime("now", "-7 days"), + ) + ) + total_findings = await session.scalar(select(func.count(Finding.id))) + latest_scans = ( + (await session.execute( + select(Scan).order_by(Scan.started_at.desc()).limit(10) + )) + .scalars() + .all() + ) + + top_rules = ( + await session.execute( + select(Finding.rule, func.count(Finding.id).label("cnt")) + .group_by(Finding.rule) + .order_by(func.count(Finding.id).desc()) + .limit(10) + ) + ).all() + + return _render( + "dashboard.html", + total_scans=total_scans, + flagged_scans=flagged_scans, + recent_flagged=recent_flagged, + total_findings=total_findings, + latest_scans=latest_scans, + top_rules=[(r.rule, r.cnt) for r in top_rules], + now=datetime.datetime.now(datetime.timezone.utc), + request=request, + ) + + +@router.get("/scans", response_class=HTMLResponse) +async def scans_list( + request: Request, + page: int = 1, + flagged: str = "", + session: AsyncSession = Depends(get_session), +): + per_page = 50 + offset = (page - 1) * per_page + + q = select(Scan) + if flagged == "1": + q = q.where(Scan.flagged == True) + q = q.order_by(Scan.started_at.desc()).offset(offset).limit(per_page) + + scans = (await session.execute(q)).scalars().all() + total = await session.scalar(select(func.count(Scan.id))) + + return _render( + "scans_list.html", + scans=scans, + page=page, + per_page=per_page, + total=total, + flagged_filter=flagged, + request=request, + ) + + +@router.get("/scans/{scan_id}", response_class=HTMLResponse) +async def scan_detail(scan_id: int, request: Request, session: AsyncSession = Depends(get_session)): + from sqlalchemy.orm import selectinload + + scan = await session.scalar( + select(Scan).where(Scan.id == scan_id).options(selectinload(Scan.findings)) + ) + if not scan: + return HTMLResponse("

Not found

", status_code=404) + + return _render("scan_detail.html", scan=scan, request=request) + + +@router.get("/packages", response_class=HTMLResponse) +async def packages_list( + request: Request, + page: int = 1, + flagged: str = "", + session: AsyncSession = Depends(get_session), +): + per_page = 50 + offset = (page - 1) * per_page + + subq = select( + Scan.package_name.label("pkg_name"), + Scan.package_version.label("pkg_ver"), + Scan.ecosystem, + Scan.repository, + func.max(Scan.started_at).label("last_scan"), + func.max(Scan.flagged).label("is_flagged"), + func.sum(Scan.total_findings).label("findings_sum"), + func.max(Scan.id).label("sid"), + ).group_by(Scan.package_name, Scan.package_version) + + if flagged == "1": + subq = subq.having(func.max(Scan.flagged) == True) + + subq = subq.subquery() + total = await session.scalar(select(func.count()).select_from(subq)) + rows = ( + await session.execute( + select(subq) + .order_by(subq.c.last_scan.desc()) + .offset(offset) + .limit(per_page) + ) + ).all() + + return _render( + "packages_list.html", + packages=rows, + page=page, + per_page=per_page, + total=total, + flagged_filter=flagged, + request=request, + ) + + +@router.get("/packages/{name}/{version}", response_class=HTMLResponse) +async def package_detail( + name: str, + version: str, + request: Request, + session: AsyncSession = Depends(get_session), +): + from sqlalchemy.orm import selectinload + + scans = ( + await session.execute( + select(Scan) + .where(Scan.package_name == name, Scan.package_version == version) + .options(selectinload(Scan.findings)) + .order_by(Scan.started_at.desc()) + ) + ).scalars().all() + + if not scans: + return HTMLResponse("

Not found

", status_code=404) + + all_findings = [] + for s in scans: + all_findings.extend(s.findings) + + return _render( + "package_detail.html", + pkg_name=name, + pkg_version=version, + scans=scans, + findings=all_findings, + request=request, + ) diff --git a/guarddog_nexus/web/templates/base.html b/guarddog_nexus/web/templates/base.html new file mode 100644 index 0000000..c6526ff --- /dev/null +++ b/guarddog_nexus/web/templates/base.html @@ -0,0 +1,41 @@ + + + + + + GuardDog Nexus + + + + + +
+ + {% block content %}{% endblock %} +
+ + diff --git a/guarddog_nexus/web/templates/dashboard.html b/guarddog_nexus/web/templates/dashboard.html new file mode 100644 index 0000000..cf16025 --- /dev/null +++ b/guarddog_nexus/web/templates/dashboard.html @@ -0,0 +1,8 @@ +{% extends "base.html" %} +{% block content %} +

Dashboard

+ +
+ {% include "dashboard_stats.html" %} +
+{% endblock %} diff --git a/guarddog_nexus/web/templates/dashboard_stats.html b/guarddog_nexus/web/templates/dashboard_stats.html new file mode 100644 index 0000000..abf1b16 --- /dev/null +++ b/guarddog_nexus/web/templates/dashboard_stats.html @@ -0,0 +1,56 @@ +
+
+
{{ total_scans }}
+ Total Scans +
+
+
{{ flagged_scans }}
+ Flagged +
+
+
{{ recent_flagged }}
+ Flagged (7 days) +
+
+
{{ total_findings }}
+ Total Findings +
+
+ +

Latest Scans

+ + + + + + + + + + + + + {% for s in latest_scans %} + + + + + + + + + {% endfor %} + +
PackageVersionEcosystemStatusFindingsTime
{{ s.package_name }}{{ s.package_version }}{{ s.ecosystem }}{{ s.status }}{% if s.flagged %}{{ s.total_findings }}{% else %}{{ s.total_findings }}{% endif %}{{ s.started_at.strftime('%Y-%m-%d %H:%M') if s.started_at }}
+ +{% if top_rules %} +

Top Rules Triggered

+ + + + {% for rule, cnt in top_rules %} + + {% endfor %} + +
RuleCount
{{ rule }}{{ cnt }}
+{% endif %} diff --git a/guarddog_nexus/web/templates/package_detail.html b/guarddog_nexus/web/templates/package_detail.html new file mode 100644 index 0000000..bfa8558 --- /dev/null +++ b/guarddog_nexus/web/templates/package_detail.html @@ -0,0 +1,36 @@ +{% extends "base.html" %} +{% block content %} +

{{ pkg_name }} v{{ pkg_version }}

+ +

Scans ({{ scans|length }})

+ + + + + + {% for s in scans %} + + + + + + + + {% endfor %} + +
IDRepoStatusFindingsTime
#{{ s.id }}{{ s.repository }}{{ s.status }}{% if s.flagged %}{{ s.total_findings }}{% else %}0{% endif %}{{ s.started_at.strftime('%Y-%m-%d %H:%M') if s.started_at }}
+ +

Findings ({{ findings|length }})

+{% if findings %} + {% for f in findings|sort(attribute='severity', reverse=true) %} +
+ [{{ f.severity }}] + {{ f.rule }} + {% if f.location %} @ {{ f.location }}{% endif %} +

{{ f.message }}

+
+ {% endfor %} +{% else %} +

No findings — package looks clean.

+{% endif %} +{% endblock %} diff --git a/guarddog_nexus/web/templates/packages_list.html b/guarddog_nexus/web/templates/packages_list.html new file mode 100644 index 0000000..629f7b5 --- /dev/null +++ b/guarddog_nexus/web/templates/packages_list.html @@ -0,0 +1,48 @@ +{% extends "base.html" %} +{% block content %} +

Packages

+ +

+ + {% if flagged_filter == '1' %}Show all{% else %}Flagged only{% endif %} + +

+ + + + + + + + + + + + + + + {% for p in packages %} + + + + + + + + + + {% endfor %} + +
NameVersionEcosystemRepoFlaggedFindingsLast Scan
{{ p.pkg_name }}{{ p.pkg_ver }}{{ p.ecosystem }}{{ p.repository }}{% if p.is_flagged %}YES{% else %}No{% endif %}{{ p.findings_sum }}{{ p.last_scan.strftime('%Y-%m-%d %H:%M') if p.last_scan }}
+ +{% set total_pages = (total // per_page) + (1 if total % per_page else 0) %} +{% if total_pages > 1 %} + +{% endif %} +{% endblock %} diff --git a/guarddog_nexus/web/templates/scan_detail.html b/guarddog_nexus/web/templates/scan_detail.html new file mode 100644 index 0000000..eb06b27 --- /dev/null +++ b/guarddog_nexus/web/templates/scan_detail.html @@ -0,0 +1,30 @@ +{% extends "base.html" %} +{% block content %} +

Scan #{{ scan.id }}

+ + + + + + + + + + + {% if scan.error_message %}{% endif %} +
Package{{ scan.package_name }}
Version{{ scan.package_version }}
Ecosystem{{ scan.ecosystem }}
Repository{{ scan.repository }}
Status{{ scan.status }}
SHA256{{ scan.sha256 or '-' }}
Started{{ scan.started_at.isoformat() if scan.started_at }}
Finished{{ scan.finished_at.isoformat() if scan.finished_at }}
Error{{ scan.error_message }}
+ +

Findings ({{ scan.findings|length }})

+{% if scan.findings %} + {% for f in scan.findings|sort(attribute='severity', reverse=true) %} +
+ [{{ f.severity }}] + {{ f.rule }} + {% if f.location %} @ {{ f.location }}{% endif %} +

{{ f.message }}

+
+ {% endfor %} +{% else %} +

No findings — package looks clean.

+{% endif %} +{% endblock %} diff --git a/guarddog_nexus/web/templates/scans_list.html b/guarddog_nexus/web/templates/scans_list.html new file mode 100644 index 0000000..8f405cb --- /dev/null +++ b/guarddog_nexus/web/templates/scans_list.html @@ -0,0 +1,48 @@ +{% extends "base.html" %} +{% block content %} +

Scans

+ +

+ + {% if flagged_filter == '1' %}Show all{% else %}Flagged only{% endif %} + +

+ + + + + + + + + + + + + + + {% for s in scans %} + + + + + + + + + + {% endfor %} + +
IDPackageVersionRepoStatusFindingsTime
#{{ s.id }}{{ s.package_name }}{{ s.package_version }}{{ s.repository }}{{ s.status }}{% if s.flagged %}{{ s.total_findings }}{% else %}0{% endif %}{{ s.started_at.strftime('%Y-%m-%d %H:%M') if s.started_at }}
+ +{% set total_pages = (total // per_page) + (1 if total % per_page else 0) %} +{% if total_pages > 1 %} + +{% endif %} +{% endblock %} diff --git a/guarddog_nexus/webhooks.py b/guarddog_nexus/webhooks.py new file mode 100644 index 0000000..bd3110e --- /dev/null +++ b/guarddog_nexus/webhooks.py @@ -0,0 +1,125 @@ +"""Nexus webhook receiver — handles component/asset webhooks.""" + +import hashlib +import hmac +import json +import re + +from fastapi import APIRouter, BackgroundTasks, Header, HTTPException, Request, status + +from guarddog_nexus.config import config +from guarddog_nexus.database import get_session +from guarddog_nexus.harvester import harvest +from guarddog_nexus.logging_setup import log + +router = APIRouter(prefix="/webhooks", tags=["webhooks"]) + +RELEVANT_ACTIONS = {"CREATED", "UPDATED"} + +EXCLUDE_NAME_PATTERNS = [ + re.compile(p) for p in [ + r"^simple/", + r"\.html$", + r"\.json$", + r"\.xml$", + r"index\.", + r"\.rss$", + r"\.atom$", + ] +] + + +def _should_skip_asset(filename: str) -> bool: + for pat in EXCLUDE_NAME_PATTERNS: + if pat.search(filename): + return True + return False + + +@router.post("/nexus") +async def nexus_webhook( + request: Request, + background_tasks: BackgroundTasks, + x_nexus_webhook_signature: str | None = Header(None, alias="X-Nexus-Webhook-Signature"), +): + payload = await request.body() + payload_str = payload.decode("utf-8") + + if config.webhook_secret: + if not x_nexus_webhook_signature: + log.warning("Webhook rejected: missing signature header") + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, detail="Missing signature" + ) + expected = hmac.new( + config.webhook_secret.encode(), payload, hashlib.sha256 + ).hexdigest() + if not hmac.compare_digest(x_nexus_webhook_signature, expected): + log.warning("Webhook rejected: invalid signature") + raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Invalid signature") + + try: + data = json.loads(payload_str) + except json.JSONDecodeError: + log.warning("Webhook received invalid JSON") + raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid JSON") + + action = data.get("action", "").upper() + if action not in RELEVANT_ACTIONS: + log.debug("Ignoring action: %s", action) + return {"status": "ignored", "action": action} + + asset = data.get("asset") or data.get("component") or data.get("repositoryComponent") + if not asset: + log.warning("Webhook payload has no asset/component") + raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="No asset in payload") + + asset_name = asset.get("name", "") + if _should_skip_asset(asset_name): + log.debug("Skipping metadata asset: %s", asset_name) + return {"status": "ignored", "reason": "metadata_asset"} + + download_url = _extract_download_url(asset, data) + if not download_url: + log.warning("Could not extract download URL from webhook") + raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="No download URL") + + repository_name = data.get("repositoryName", asset.get("repositoryName", "")) + format_ = asset.get("format", "pypi") + asset_path = asset.get("path", download_url) + + log.info( + "Webhook: %s %s in %s (%s)", + action, + asset_name, + repository_name, + format_, + ) + + background_tasks.add_task( + _scan_in_background, download_url, repository_name, format_, asset_path + ) + + return {"status": "accepted", "package": asset_name, "action": action} + + +def _extract_download_url(asset: dict, full_payload: dict) -> str | None: + for key in ("downloadUrl", "download_url", "url"): + val = asset.get(key) + if val: + return val + return full_payload.get("downloadUrl") or full_payload.get("download_url") + + +async def _scan_in_background( + download_url: str, + repository: str, + format_: str, + asset_path: str, +): + try: + async for session in get_session(): + await harvest(download_url, repository, format_, asset_path, session) + break + except Exception as e: + log.error("Background scan failed: %s", e) diff --git a/pyproject.toml b/pyproject.toml index 230540d..8c01211 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,6 +37,10 @@ target-version = "py310" line-length = 100 select = ["E", "F", "I", "W"] +[tool.ruff.lint] +select = ["E", "F", "I", "W"] +ignore = ["E712"] + [tool.ruff.lint.isort] known-first-party = ["guarddog_nexus"] diff --git a/scripts/setup-nexus.sh b/scripts/setup-nexus.sh new file mode 100644 index 0000000..46abcbb --- /dev/null +++ b/scripts/setup-nexus.sh @@ -0,0 +1,73 @@ +#!/bin/sh +# Setup script for test Nexus instance. +# Creates a PyPI proxy repo and a webhook pointing to guarddog-nexus. + +NEXUS_URL="${NEXUS_URL:-http://nexus:8081}" +ADMIN_PASSWORD="${ADMIN_PASSWORD:-admin123}" +WEBHOOK_URL="${WEBHOOK_URL:-http://guarddog-nexus:8080/webhooks/nexus}" + +echo "Waiting for Nexus to start..." + +# Wait until Nexus REST API is available (up to 5 minutes) +MAX_WAIT=300 +ELAPSED=0 +while [ $ELAPSED -lt $MAX_WAIT ]; do + if curl -sf -o /dev/null "${NEXUS_URL}/service/rest/v1/status" 2>/dev/null; then + echo "Nexus is up!" + break + fi + sleep 5 + ELAPSED=$((ELAPSED + 5)) +done + +if [ $ELAPSED -ge $MAX_WAIT ]; then + echo "Timed out waiting for Nexus" + exit 1 +fi + +# Check if password needs changing (first run) +ADMIN_PASSWORD_FILE="/nexus-data/admin.password" +if [ -f "$ADMIN_PASSWORD_FILE" ]; then + RANDOM_PASS=$(cat "$ADMIN_PASSWORD_FILE") + echo "Using random admin password: $RANDOM_PASS" + AUTH_PASS="$RANDOM_PASS" +else + AUTH_PASS="$ADMIN_PASSWORD" +fi + +echo "Creating PyPI proxy repository..." +curl -sf -u "admin:${AUTH_PASS}" \ + -H "Content-Type: application/json" \ + -d '{ + "name": "pypi-proxy", + "online": true, + "storage": {"blobStoreName": "default", "strictContentTypeValidation": true}, + "proxy": {"remoteUrl": "https://pypi.org", "contentMaxAge": 1440}, + "format": "pypi" + }' \ + "${NEXUS_URL}/service/rest/v1/repositories/pypi/proxy" || echo "Repo may already exist" + +echo "Creating webhook..." +curl -sf -u "admin:${AUTH_PASS}" \ + -H "Content-Type: application/json" \ + -d "{ + \"name\": \"guarddog-scan\", + \"eventTypes\": [\"repository.component\", \"repository.asset\"], + \"format\": \"pypi\", + \"url\": \"${WEBHOOK_URL}\", + \"secret\": \"\", + \"enabled\": true + }" \ + "${NEXUS_URL}/service/rest/v1/webhooks" || echo "Webhook may already exist" + +# Change admin password if this was first run +if [ -f "$ADMIN_PASSWORD_FILE" ]; then + echo "Changing admin password..." + curl -sf -u "admin:${RANDOM_PASS}" \ + -H "Content-Type: text/plain" \ + -X PUT \ + -d "${ADMIN_PASSWORD}" \ + "${NEXUS_URL}/service/rest/v1/security/users/admin/change-password" +fi + +echo "Nexus setup complete." diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..a62d0cb --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,131 @@ +"""Test fixtures for guarddog-nexus.""" + +import os +import sys + +import pytest +import pytest_asyncio +from httpx import ASGITransport, AsyncClient +from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) + +os.environ["DATABASE_PATH"] = ":memory:" +os.environ["NEXUS_URL"] = "http://nexus:8081" +os.environ["NEXUS_USERNAME"] = "admin" +os.environ["NEXUS_PASSWORD"] = "admin123" +os.environ["LOG_SYSLOG_HOST"] = "" +os.environ["TEMP_DIR"] = "/tmp/guarddog-nexus-test" + +from guarddog_nexus.database import Base, get_session # noqa: E402 +from guarddog_nexus.main import app # noqa: E402 + + +@pytest_asyncio.fixture +async def db_engine(): + engine = create_async_engine("sqlite+aiosqlite:///file:guarddog_test?mode=memory&cache=shared&uri=true") + async with engine.begin() as conn: + await conn.run_sync(Base.metadata.create_all) + yield engine + async with engine.begin() as conn: + await conn.run_sync(Base.metadata.drop_all) + await engine.dispose() + + +@pytest_asyncio.fixture +async def db_session(db_engine): + maker = async_sessionmaker(db_engine, class_=AsyncSession, expire_on_commit=False) + async with maker() as session: + yield session + + +@pytest_asyncio.fixture +async def client(db_engine): + maker = async_sessionmaker(db_engine, class_=AsyncSession, expire_on_commit=False) + + async def override_get_session(): + async with maker() as session: + yield session + + app.dependency_overrides[get_session] = override_get_session + + transport = ASGITransport(app=app) + async with AsyncClient(transport=transport, base_url="http://test") as ac: + yield ac + + app.dependency_overrides.clear() + + +@pytest.fixture +def sample_nexus_webhook(): + return { + "timestamp": "2026-05-09T12:00:00.000+00:00", + "nodeId": "test-node", + "initiator": "admin", + "action": "CREATED", + "repositoryName": "pypi-proxy", + "asset": { + "name": "requests-2.31.0.tar.gz", + "format": "pypi", + "path": "packages/requests/2.31.0/requests-2.31.0.tar.gz", + "downloadUrl": "http://nexus:8081/repository/pypi-proxy/packages/requests/2.31.0/requests-2.31.0.tar.gz", + }, + } + + +@pytest.fixture +def guarddog_output_clean(): + return { + "results": [], + "errors": [], + } + + +@pytest.fixture +def guarddog_output_flagged(): + return { + "results": [ + { + "rule": "shady-links", + "severity": "WARNING", + "message": "Package contains URL to suspicious domain", + "location": "setup.py:15", + }, + { + "rule": "exec-base64", + "severity": "ERROR", + "message": "Base64-encoded code execution detected", + "location": "core.py:42", + }, + ], + "errors": [], + } + + +@pytest.fixture +def guarddog_normalized_flagged(): + return { + "findings": [ + { + "rule": "shady-links", + "severity": "WARNING", + "message": "Suspicious URL", + "location": "setup.py:15", + }, + { + "rule": "exec-base64", + "severity": "ERROR", + "message": "Base64 exec", + "location": "core.py:42", + }, + ], + "errors": [], + } + + +@pytest.fixture +def guarddog_normalized_clean(): + return { + "findings": [], + "errors": [], + } diff --git a/tests/test_api.py b/tests/test_api.py new file mode 100644 index 0000000..9de7d47 --- /dev/null +++ b/tests/test_api.py @@ -0,0 +1,72 @@ +"""Tests for REST API endpoints.""" + +import pytest + + +@pytest.mark.asyncio +async def test_health(client): + resp = await client.get("/health") + assert resp.status_code == 200 + assert resp.json()["status"] == "ok" + + +@pytest.mark.asyncio +async def test_list_scans_empty(client): + resp = await client.get("/api/v1/scans") + assert resp.status_code == 200 + data = resp.json() + assert data["total"] == 0 + assert len(data["scans"]) == 0 + + +@pytest.mark.asyncio +async def test_scan_stats_empty(client): + resp = await client.get("/api/v1/scans/stats") + assert resp.status_code == 200 + data = resp.json() + assert data["total_scans"] == 0 + assert data["flagged_scans"] == 0 + + +@pytest.mark.asyncio +async def test_scan_not_found(client): + resp = await client.get("/api/v1/scans/99999") + assert resp.status_code == 200 + assert "detail" in resp.json() + + +@pytest.mark.asyncio +async def test_list_packages_empty(client): + resp = await client.get("/api/v1/packages") + assert resp.status_code == 200 + data = resp.json() + assert data["total"] == 0 + + +@pytest.mark.asyncio +async def test_list_findings_empty(client): + resp = await client.get("/api/v1/findings") + assert resp.status_code == 200 + data = resp.json() + assert data["total"] == 0 + + +@pytest.mark.asyncio +async def test_web_ui_dashboard(client): + resp = await client.get("/") + assert resp.status_code == 200 + assert "GuardDog Nexus" in resp.text + + +@pytest.mark.asyncio +async def test_web_ui_scans(client): + resp = await client.get("/scans") + assert resp.status_code == 200 + assert "Scans" in resp.text + + +@pytest.mark.asyncio +async def test_web_ui_packages(client): + resp = await client.get("/packages") + assert resp.status_code == 200 + assert "Packages" in resp.text diff --git a/tests/test_harvester.py b/tests/test_harvester.py new file mode 100644 index 0000000..6611d77 --- /dev/null +++ b/tests/test_harvester.py @@ -0,0 +1,114 @@ +"""Tests for harvester pipeline.""" + +from unittest.mock import patch + +import pytest +from sqlalchemy import select + +from guarddog_nexus.harvester import harvest +from guarddog_nexus.models import Finding + + +@pytest.mark.asyncio +async def test_harvest_new_package(db_session, guarddog_normalized_flagged): + with ( + patch("guarddog_nexus.harvester.download_asset") as mock_dl, + patch("guarddog_nexus.harvester.compute_sha256") as mock_sha, + patch("guarddog_nexus.harvester.scan_package") as mock_scan, + ): + mock_dl.return_value = "/tmp/test-package.tar.gz" + mock_sha.return_value = "abc123" + mock_scan.return_value = guarddog_normalized_flagged + + scan = await harvest( + download_url="http://nexus:8081/repository/pypi-proxy/packages/requests/2.31.0/requests-2.31.0.tar.gz", + repository="pypi-proxy", + format_="pypi", + asset_path="packages/requests/2.31.0/requests-2.31.0.tar.gz", + session=db_session, + ) + + assert scan is not None + assert scan.package_name == "requests" + assert scan.package_version == "2.31.0" + assert scan.ecosystem == "pypi" + assert scan.status == "completed" + assert scan.flagged is True + assert scan.total_findings == 2 + assert scan.sha256 == "abc123" + + findings = ( + (await db_session.execute(select(Finding).where(Finding.scan_id == scan.id))) + .scalars() + .all() + ) + assert len(findings) == 2 + + +@pytest.mark.asyncio +async def test_harvest_skips_duplicate(db_session, guarddog_normalized_flagged): + with ( + patch("guarddog_nexus.harvester.download_asset") as mock_dl, + patch("guarddog_nexus.harvester.compute_sha256") as mock_sha, + patch("guarddog_nexus.harvester.scan_package") as mock_scan, + ): + mock_dl.return_value = "/tmp/test.tar.gz" + mock_sha.return_value = "abc" + mock_scan.return_value = guarddog_normalized_flagged + + first = await harvest( + "http://nexus:8081/repo/pypi-proxy/packages/x/1.0/x-1.0.tar.gz", + "pypi-proxy", "pypi", "packages/x/1.0/x-1.0.tar.gz", db_session, + ) + second = await harvest( + "http://nexus:8081/repo/pypi-proxy/packages/x/1.0/x-1.0.tar.gz", + "pypi-proxy", "pypi", "packages/x/1.0/x-1.0.tar.gz", db_session, + ) + + assert first is not None + assert second is None # skipped duplicate + + +@pytest.mark.asyncio +async def test_harvest_clean_package(db_session, guarddog_normalized_clean): + with ( + patch("guarddog_nexus.harvester.download_asset") as mock_dl, + patch("guarddog_nexus.harvester.compute_sha256") as mock_sha, + patch("guarddog_nexus.harvester.scan_package") as mock_scan, + ): + mock_dl.return_value = "/tmp/test.tar.gz" + mock_sha.return_value = "abc" + mock_scan.return_value = guarddog_normalized_clean + + scan = await harvest( + "http://nexus:8081/repo/pypi-proxy/packages/django/4.2/django-4.2.tar.gz", + "pypi-proxy", "pypi", "packages/django/4.2/django-4.2.tar.gz", db_session, + ) + + assert scan is not None + assert scan.flagged is False + assert scan.total_findings == 0 + + +@pytest.mark.asyncio +async def test_harvest_download_failure(db_session): + with patch("guarddog_nexus.harvester.download_asset") as mock_dl: + mock_dl.return_value = None + + scan = await harvest( + "http://nexus:8081/repo/pypi-proxy/packages/fail/1.0/fail-1.0.tar.gz", + "pypi-proxy", "pypi", "packages/fail/1.0/fail-1.0.tar.gz", db_session, + ) + + assert scan is not None + assert scan.status == "failed" + assert "Download failed" in (scan.error_message or "") + + +@pytest.mark.asyncio +async def test_harvest_skips_non_package_asset(db_session): + scan = await harvest( + "http://nexus:8081/repo/pypi-proxy/simple/index.html", + "pypi-proxy", "pypi", "simple/index.html", db_session, + ) + assert scan is None diff --git a/tests/test_scanner.py b/tests/test_scanner.py new file mode 100644 index 0000000..fbca2f3 --- /dev/null +++ b/tests/test_scanner.py @@ -0,0 +1,28 @@ +"""Tests for GuardDog scanner integration.""" + +from guarddog_nexus.scanner import _normalize_output + + +def test_normalize_clean_output(guarddog_output_clean): + result = _normalize_output(guarddog_output_clean) + assert len(result["findings"]) == 0 + assert len(result["errors"]) == 0 + + +def test_normalize_flagged_output(guarddog_output_flagged): + result = _normalize_output(guarddog_output_flagged) + assert len(result["findings"]) == 2 + assert result["findings"][0]["rule"] == "shady-links" + assert result["findings"][0]["severity"] == "WARNING" + assert result["findings"][1]["rule"] == "exec-base64" + assert result["findings"][1]["severity"] == "ERROR" + + +def test_normalize_issues_format(): + data = { + "issues": [{"id": "test-rule", "severity": "ERROR", "description": "Bad"}], + "errors": [], + } + result = _normalize_output(data) + assert len(result["findings"]) == 1 + assert result["findings"][0]["rule"] == "test-rule" diff --git a/tests/test_webhooks.py b/tests/test_webhooks.py new file mode 100644 index 0000000..7a08ca1 --- /dev/null +++ b/tests/test_webhooks.py @@ -0,0 +1,72 @@ +"""Tests for Nexus webhook receiver.""" + +from unittest.mock import patch + +import pytest + + +@pytest.mark.asyncio +async def test_webhook_rejects_invalid_json(client): + resp = await client.post( + "/webhooks/nexus", + content="not json", + headers={"Content-Type": "application/json"}, + ) + assert resp.status_code == 400 + + +@pytest.mark.asyncio +async def test_webhook_ignores_deleted_action(client, sample_nexus_webhook): + sample_nexus_webhook["action"] = "DELETED" + resp = await client.post( + "/webhooks/nexus", + json=sample_nexus_webhook, + ) + assert resp.status_code == 200 + assert resp.json()["status"] == "ignored" + + +@pytest.mark.asyncio +async def test_webhook_accepts_created(client, sample_nexus_webhook): + with patch("guarddog_nexus.webhooks._scan_in_background") as _mock_scan: + resp = await client.post( + "/webhooks/nexus", + json=sample_nexus_webhook, + ) + assert resp.status_code == 200 + data = resp.json() + assert data["status"] == "accepted" + assert data["package"] == "requests-2.31.0.tar.gz" + assert data["action"] == "CREATED" + + +@pytest.mark.asyncio +async def test_webhook_accepts_updated(client, sample_nexus_webhook): + sample_nexus_webhook["action"] = "UPDATED" + with patch("guarddog_nexus.webhooks._scan_in_background") as _mock_scan: + resp = await client.post( + "/webhooks/nexus", + json=sample_nexus_webhook, + ) + assert resp.status_code == 200 + assert resp.json()["status"] == "accepted" + + +@pytest.mark.asyncio +async def test_webhook_skips_metadata_assets(client, sample_nexus_webhook): + sample_nexus_webhook["asset"]["name"] = "index.html" + resp = await client.post( + "/webhooks/nexus", + json=sample_nexus_webhook, + ) + assert resp.status_code == 200 + assert resp.json()["status"] == "ignored" + + +@pytest.mark.asyncio +async def test_webhook_missing_asset(client): + resp = await client.post( + "/webhooks/nexus", + json={"action": "CREATED", "repositoryName": "test"}, + ) + assert resp.status_code == 400