feat: guarddog-nexus — webhook-based PyPI scanner with web UI

This commit is contained in:
Marker689
2026-05-09 04:48:10 +03:00
parent bdcc82807d
commit 4ce99d3c85
32 changed files with 1865 additions and 0 deletions

22
Dockerfile Normal file
View File

@@ -0,0 +1,22 @@
FROM python:3.12-slim-bookworm
RUN apt-get update && apt-get install -y --no-install-recommends curl ca-certificates \
&& rm -rf /var/lib/apt/lists/*
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /usr/local/bin/
WORKDIR /app
COPY pyproject.toml ./
COPY guarddog_nexus/ guarddog_nexus/
RUN uv pip install --system guarddog
RUN uv pip install --system -e .
RUN mkdir -p /data /tmp/guarddog-nexus
ENV DATABASE_PATH=/data/guarddog.db
ENV TEMP_DIR=/tmp/guarddog-nexus
EXPOSE 8080
CMD ["python", "-m", "guarddog_nexus.main"]

44
docker-compose.yml Normal file
View File

@@ -0,0 +1,44 @@
services:
guarddog-nexus:
build: .
ports:
- "8080:8080"
environment:
NEXUS_URL: http://nexus:8081
NEXUS_USERNAME: admin
NEXUS_PASSWORD: "${NEXUS_PASSWORD:-admin123}"
NEXUS_REPOSITORIES: pypi-proxy
LOG_LEVEL: INFO
LOG_SYSLOG_HOST: ""
HOST: "0.0.0.0"
PORT: "8080"
volumes:
- ./data:/data
depends_on:
nexus-setup:
condition: service_completed_successfully
restart: unless-stopped
nexus:
image: sonatype/nexus3:3.79.0
ports:
- "8081:8081"
volumes:
- nexus-data:/nexus-data
restart: unless-stopped
nexus-setup:
image: alpine:3.21
volumes:
- ./scripts/setup-nexus.sh:/setup.sh:ro
- nexus-data:/nexus-data:ro
environment:
NEXUS_URL: http://nexus:8081
ADMIN_PASSWORD: "${NEXUS_PASSWORD:-admin123}"
WEBHOOK_URL: http://guarddog-nexus:8080/webhooks/nexus
entrypoint: ["/bin/sh", "/setup.sh"]
depends_on:
- nexus
volumes:
nexus-data:

View File

View File

View File

@@ -0,0 +1,49 @@
"""REST API for findings (across all scans)."""
from fastapi import APIRouter, Depends, Query
from sqlalchemy import func, select
from sqlalchemy.ext.asyncio import AsyncSession
from guarddog_nexus.database import get_session
from guarddog_nexus.models import Finding
router = APIRouter(prefix="/api/v1/findings", tags=["findings"])
@router.get("")
async def list_findings(
limit: int = Query(50, le=200),
offset: int = Query(0, ge=0),
rule: str | None = Query(None),
severity: str | None = Query(None),
scan_id: int | None = Query(None),
session: AsyncSession = Depends(get_session),
):
q = select(Finding)
if rule:
q = q.where(Finding.rule == rule)
if severity:
q = q.where(Finding.severity == severity)
if scan_id:
q = q.where(Finding.scan_id == scan_id)
total = await session.scalar(select(func.count()).select_from(q.subquery()))
findings = (await session.execute(q.offset(offset).limit(limit))).scalars().all()
return {
"total": total,
"limit": limit,
"offset": offset,
"findings": [
{
"id": f.id,
"scan_id": f.scan_id,
"rule": f.rule,
"severity": f.severity,
"message": f.message,
"location": f.location,
"created_at": f.created_at.isoformat() if f.created_at else None,
}
for f in findings
],
}

View File

@@ -0,0 +1,122 @@
"""REST API for packages (distinct packages across scans)."""
from fastapi import APIRouter, Depends, Query
from sqlalchemy import func, select
from sqlalchemy.ext.asyncio import AsyncSession
from guarddog_nexus.database import get_session
from guarddog_nexus.models import Finding, Scan
router = APIRouter(prefix="/api/v1/packages", tags=["packages"])
@router.get("")
async def list_packages(
limit: int = Query(50, le=200),
offset: int = Query(0, ge=0),
ecosystem: str | None = Query(None),
flagged: bool | None = Query(None),
session: AsyncSession = Depends(get_session),
):
subq = (
select(
Scan.package_name,
Scan.package_version,
Scan.ecosystem,
Scan.repository,
func.max(Scan.started_at).label("last_scanned_at"),
func.max(Scan.flagged).label("is_flagged"),
func.sum(Scan.total_findings).label("total_findings"),
func.max(Scan.id).label("latest_scan_id"),
)
.group_by(Scan.package_name, Scan.package_version)
)
if ecosystem:
subq = subq.where(Scan.ecosystem == ecosystem)
if flagged is not None:
subq = subq.having(func.max(Scan.flagged) == flagged)
total_q = select(func.count()).select_from(subq.subquery())
total = await session.scalar(total_q)
rows = (
(await session.execute(
subq.order_by(func.max(Scan.started_at).desc()).offset(offset).limit(limit)
))
.all()
)
return {
"total": total,
"limit": limit,
"offset": offset,
"packages": [
{
"name": r.package_name,
"version": r.package_version,
"ecosystem": r.ecosystem,
"repository": r.repository,
"last_scanned_at": r.last_scanned_at.isoformat() if r.last_scanned_at else None,
"flagged": bool(r.is_flagged),
"total_findings": r.total_findings,
"latest_scan_id": r.latest_scan_id,
}
for r in rows
],
}
@router.get("/{name}/{version}")
async def get_package(
name: str,
version: str,
session: AsyncSession = Depends(get_session),
):
scans = (
await session.execute(
select(Scan)
.where(Scan.package_name == name, Scan.package_version == version)
.order_by(Scan.started_at.desc())
)
).scalars().all()
if not scans:
return {"detail": "Not found"}
all_findings = []
for s in scans:
findings = (
await session.execute(
select(Finding).where(Finding.scan_id == s.id)
)
).scalars().all()
all_findings.extend(f.__dict__ for f in findings)
return {
"name": scans[0].package_name,
"version": scans[0].package_version,
"ecosystem": scans[0].ecosystem,
"repository": scans[0].repository,
"flagged": any(s.flagged for s in scans),
"scans": [
{
"id": s.id,
"status": s.status,
"total_findings": s.total_findings,
"flagged": s.flagged,
"started_at": s.started_at.isoformat() if s.started_at else None,
}
for s in scans
],
"findings": [
{
"id": f["id"],
"rule": f.get("rule"),
"severity": f.get("severity"),
"message": f.get("message"),
"location": f.get("location"),
}
for f in all_findings
],
}

120
guarddog_nexus/api/scans.py Normal file
View File

@@ -0,0 +1,120 @@
"""REST API for scans."""
from fastapi import APIRouter, Depends, Query
from sqlalchemy import func, select
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import selectinload
from guarddog_nexus.database import get_session
from guarddog_nexus.models import Finding, Scan
router = APIRouter(prefix="/api/v1/scans", tags=["scans"])
@router.get("")
async def list_scans(
limit: int = Query(50, le=200),
offset: int = Query(0, ge=0),
flagged: bool | None = Query(None),
session: AsyncSession = Depends(get_session),
):
q = select(Scan)
if flagged is not None:
q = q.where(Scan.flagged == flagged)
q = q.order_by(Scan.started_at.desc()).offset(offset).limit(limit)
total = await session.scalar(select(func.count(Scan.id)))
scans = (await session.execute(q)).scalars().all()
return {
"total": total,
"limit": limit,
"offset": offset,
"scans": [
{
"id": s.id,
"package_name": s.package_name,
"package_version": s.package_version,
"ecosystem": s.ecosystem,
"repository": s.repository,
"status": s.status,
"total_findings": s.total_findings,
"flagged": s.flagged,
"started_at": s.started_at.isoformat() if s.started_at else None,
"finished_at": s.finished_at.isoformat() if s.finished_at else None,
"error_message": s.error_message,
}
for s in scans
],
}
@router.get("/stats")
async def scan_stats(session: AsyncSession = Depends(get_session)):
total_scans = await session.scalar(select(func.count(Scan.id)))
flagged_scans = await session.scalar(
select(func.count(Scan.id)).where(Scan.flagged == True)
)
recent_flagged = await session.scalar(
select(func.count(Scan.id)).where(
Scan.flagged == True,
Scan.started_at >= func.datetime("now", "-7 days"),
)
)
total_findings = await session.scalar(select(func.count(Finding.id)))
top_rules = (
await session.execute(
select(Finding.rule, func.count(Finding.id).label("cnt"))
.group_by(Finding.rule)
.order_by(func.count(Finding.id).desc())
.limit(10)
)
).all()
latest_scan = await session.scalar(
select(Scan).order_by(Scan.started_at.desc()).limit(1)
)
return {
"total_scans": total_scans,
"flagged_scans": flagged_scans,
"recent_flagged": recent_flagged,
"total_findings": total_findings,
"top_rules": [{"rule": r.rule, "count": r.cnt} for r in top_rules],
"latest_scan_at": latest_scan.started_at.isoformat() if latest_scan else None,
}
@router.get("/{scan_id}")
async def get_scan(scan_id: int, session: AsyncSession = Depends(get_session)):
scan = await session.scalar(
select(Scan).where(Scan.id == scan_id).options(selectinload(Scan.findings))
)
if not scan:
return {"detail": "Not found"}
return {
"id": scan.id,
"package_name": scan.package_name,
"package_version": scan.package_version,
"ecosystem": scan.ecosystem,
"repository": scan.repository,
"nexus_asset_url": scan.nexus_asset_url,
"sha256": scan.sha256,
"status": scan.status,
"total_findings": scan.total_findings,
"flagged": scan.flagged,
"started_at": scan.started_at.isoformat() if scan.started_at else None,
"finished_at": scan.finished_at.isoformat() if scan.finished_at else None,
"error_message": scan.error_message,
"findings": [
{
"id": f.id,
"rule": f.rule,
"severity": f.severity,
"message": f.message,
"location": f.location,
}
for f in scan.findings
],
}

129
guarddog_nexus/harvester.py Normal file
View File

@@ -0,0 +1,129 @@
"""Harvester: download a package from Nexus, scan it, store results."""
import datetime
import os
import tempfile
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from guarddog_nexus.config import config
from guarddog_nexus.logging_setup import log
from guarddog_nexus.models import Finding, Scan, ScanStatus
from guarddog_nexus.nexus_client import (
SUPPORTED_EXTENSIONS,
compute_sha256,
download_asset,
extract_pypi_info,
)
from guarddog_nexus.scanner import scan_package
async def harvest(
download_url: str,
repository: str,
format_: str,
asset_path: str,
session: AsyncSession,
) -> Scan | None:
"""Download, scan, and store results for a single package asset."""
ecosystem = "pypi" if format_ in ("pypi",) else format_
filename = os.path.basename(download_url.split("?")[0])
if not filename.endswith(SUPPORTED_EXTENSIONS):
log.info("Skipping non-package asset: %s", filename)
return None
info = extract_pypi_info(asset_path)
if info is None:
log.warning("Could not parse package info from path: %s", asset_path)
return None
package_name, package_version = info
existing = await session.scalar(
select(Scan.id).where(
Scan.package_name == package_name,
Scan.package_version == package_version,
Scan.repository == repository,
)
)
if existing:
log.info("Already scanned %s==%s, skipping", package_name, package_version)
return None
scan = Scan(
package_name=package_name,
package_version=package_version,
ecosystem=ecosystem,
repository=repository,
nexus_asset_url=download_url,
status=ScanStatus.PENDING.value,
)
session.add(scan)
await session.commit()
await session.refresh(scan)
os.makedirs(config.temp_dir, exist_ok=True)
tmpdir = tempfile.mkdtemp(dir=config.temp_dir)
try:
scan.status = ScanStatus.SCANNING.value
await session.commit()
downloaded = download_asset(download_url, tmpdir)
if not downloaded:
scan.status = ScanStatus.FAILED.value
scan.error_message = "Download failed"
scan.finished_at = datetime.datetime.now(datetime.timezone.utc)
await session.commit()
return scan
scan.sha256 = compute_sha256(downloaded)
await session.commit()
log.info("Scanning %s==%s", package_name, package_version)
result = scan_package(downloaded, ecosystem)
findings_list = result.get("findings", [])
for fdata in findings_list:
finding = Finding(
scan_id=scan.id,
rule=fdata["rule"],
severity=fdata["severity"],
message=fdata["message"],
location=fdata.get("location"),
)
session.add(finding)
scan.total_findings = len(findings_list)
scan.flagged = len(findings_list) > 0
scan.status = ScanStatus.COMPLETED.value
scan.finished_at = datetime.datetime.now(datetime.timezone.utc)
await session.commit()
if scan.flagged:
log.warning(
"FLAGGED %s==%s: %d findings in repo %s",
package_name,
package_version,
scan.total_findings,
repository,
)
log.info(
"Scan complete: %s==%s (%d findings)",
package_name,
package_version,
scan.total_findings,
)
return scan
except Exception as e:
log.error("Scan failed for %s==%s: %s", package_name, package_version, e)
scan.status = ScanStatus.FAILED.value
scan.error_message = str(e)[:1000]
scan.finished_at = datetime.datetime.now(datetime.timezone.utc)
await session.commit()
return scan

View File

@@ -0,0 +1,43 @@
"""Structured logging with syslog support."""
import json
import logging
import sys
from logging.handlers import SysLogHandler
from guarddog_nexus.config import config
class JsonFormatter(logging.Formatter):
def format(self, record: logging.LogRecord) -> str:
payload = {
"timestamp": self.formatTime(record, self.datefmt),
"level": record.levelname,
"logger": record.name,
"message": record.getMessage(),
}
if record.exc_info and record.exc_info[1]:
payload["exception"] = str(record.exc_info[1])
return json.dumps(payload, ensure_ascii=False)
def setup_logging() -> logging.Logger:
logger = logging.getLogger("guarddog_nexus")
logger.setLevel(config.log_level.upper())
stdout_handler = logging.StreamHandler(sys.stdout)
stdout_handler.setFormatter(JsonFormatter())
logger.addHandler(stdout_handler)
if config.log_syslog_host:
syslog_handler = SysLogHandler(
address=(config.log_syslog_host, config.log_syslog_port),
facility=SysLogHandler.LOG_LOCAL0,
)
syslog_handler.setFormatter(JsonFormatter())
logger.addHandler(syslog_handler)
return logger
log = setup_logging()

61
guarddog_nexus/main.py Normal file
View File

@@ -0,0 +1,61 @@
"""GuardDog Nexus — FastAPI application entry point."""
import os
from contextlib import asynccontextmanager
import uvicorn
from fastapi import FastAPI
from fastapi.staticfiles import StaticFiles
from guarddog_nexus.api import findings, packages, scans
from guarddog_nexus.config import config
from guarddog_nexus.database import init_db
from guarddog_nexus.logging_setup import log
from guarddog_nexus.web.routes import router as web_router
from guarddog_nexus.webhooks import router as webhook_router
STATIC_DIR = os.path.join(os.path.dirname(__file__), "static")
@asynccontextmanager
async def lifespan(app: FastAPI):
await init_db()
log.info("GuardDog Nexus started on %s:%s", config.host, config.port)
yield
log.info("GuardDog Nexus shutting down")
app = FastAPI(
title="GuardDog Nexus",
version="0.1.0",
description="Scan PyPI packages from Sonatype Nexus webhooks using GuardDog",
lifespan=lifespan,
)
app.include_router(webhook_router)
app.include_router(scans.router)
app.include_router(packages.router)
app.include_router(findings.router)
app.include_router(web_router)
if os.path.isdir(STATIC_DIR):
app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static")
@app.get("/health")
async def health():
return {"status": "ok", "version": "0.1.0"}
def main():
uvicorn.run(
"guarddog_nexus.main:app",
host=config.host,
port=config.port,
log_level=config.log_level.lower(),
reload=False,
)
if __name__ == "__main__":
main()

58
guarddog_nexus/models.py Normal file
View File

@@ -0,0 +1,58 @@
"""SQLAlchemy ORM models."""
import datetime
from enum import Enum
from sqlalchemy import Boolean, DateTime, ForeignKey, Integer, String, Text, func
from sqlalchemy.orm import Mapped, mapped_column, relationship
from guarddog_nexus.database import Base
class ScanStatus(str, Enum):
PENDING = "pending"
SCANNING = "scanning"
COMPLETED = "completed"
FAILED = "failed"
class Scan(Base):
__tablename__ = "scans"
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
package_name: Mapped[str] = mapped_column(String(255), nullable=False)
package_version: Mapped[str] = mapped_column(String(255), nullable=False)
ecosystem: Mapped[str] = mapped_column(String(50), nullable=False, default="pypi")
repository: Mapped[str] = mapped_column(String(255), nullable=False)
nexus_asset_url: Mapped[str] = mapped_column(Text, nullable=False)
sha256: Mapped[str | None] = mapped_column(String(64), nullable=True)
status: Mapped[str] = mapped_column(
String(20), nullable=False, default=ScanStatus.PENDING.value
)
total_findings: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
flagged: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
started_at: Mapped[datetime.datetime] = mapped_column(
DateTime, nullable=False, default=func.now()
)
finished_at: Mapped[datetime.datetime | None] = mapped_column(DateTime, nullable=True)
error_message: Mapped[str | None] = mapped_column(Text, nullable=True)
findings: Mapped[list["Finding"]] = relationship(
"Finding", back_populates="scan", cascade="all, delete-orphan"
)
class Finding(Base):
__tablename__ = "findings"
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
scan_id: Mapped[int] = mapped_column(Integer, ForeignKey("scans.id"), nullable=False)
rule: Mapped[str] = mapped_column(String(255), nullable=False)
severity: Mapped[str] = mapped_column(String(50), nullable=False)
message: Mapped[str] = mapped_column(Text, nullable=False)
location: Mapped[str | None] = mapped_column(String(512), nullable=True)
created_at: Mapped[datetime.datetime] = mapped_column(
DateTime, nullable=False, default=func.now()
)
scan: Mapped["Scan"] = relationship("Scan", back_populates="findings")

View File

@@ -0,0 +1,65 @@
"""Sonatype Nexus REST API client."""
import hashlib
import os
import subprocess
from guarddog_nexus.config import config
from guarddog_nexus.logging_setup import log
SUPPORTED_EXTENSIONS = (".tar.gz", ".tgz", ".whl", ".zip")
PACKAGE_FILE_PATTERNS = ("packages/",)
def get_ecosystem_from_format(fmt: str) -> str | None:
mapping = {
"pypi": "pypi",
"npm": "npm",
"rubygems": "rubygems",
"go": "go",
"raw": None,
}
return mapping.get(fmt.lower() if fmt else "")
def extract_pypi_info(asset_path: str) -> tuple[str, str] | None:
"""Extract package name and version from a PyPI asset path.
Path format: packages/requests/2.31.0/requests-2.31.0.tar.gz
"""
parts = asset_path.strip("/").split("/")
if len(parts) >= 3 and parts[0] == "packages":
return parts[1], parts[2]
return None
def download_asset(download_url: str, dest_dir: str) -> str | None:
"""Download an asset from Nexus using curl (available in Docker)."""
dest_path = os.path.join(dest_dir, os.path.basename(download_url.split("?")[0]))
try:
result = subprocess.run(
[
"curl", "-sfSL",
"-u", f"{config.nexus_username}:{config.nexus_password}",
"-o", dest_path,
download_url,
],
capture_output=True,
text=True,
timeout=120,
)
if result.returncode != 0:
log.warning("Failed to download %s: %s", download_url, result.stderr)
return None
return dest_path
except Exception as e:
log.error("Download error for %s: %s", download_url, e)
return None
def compute_sha256(filepath: str) -> str:
h = hashlib.sha256()
with open(filepath, "rb") as f:
for chunk in iter(lambda: f.read(8192), b""):
h.update(chunk)
return h.hexdigest()

74
guarddog_nexus/scanner.py Normal file
View File

@@ -0,0 +1,74 @@
"""GuardDog CLI integration via subprocess."""
import json
import shutil
import subprocess
from guarddog_nexus.config import config
from guarddog_nexus.logging_setup import log
GUARDDOG_BIN = shutil.which("guarddog") or "guarddog"
def scan_package(filepath: str, ecosystem: str = "pypi") -> dict:
"""Run guarddog scan on a downloaded package file. Returns parsed JSON output."""
cmd = [
GUARDDOG_BIN, ecosystem, "scan", filepath,
"--output-format", "json",
]
log.info("Running: %s", " ".join(cmd))
try:
result = subprocess.run(
cmd,
capture_output=True,
text=True,
timeout=config.scan_timeout_seconds,
)
except subprocess.TimeoutExpired:
log.error("GuardDog scan timed out for %s", filepath)
return {"issues": [], "errors": ["timeout"]}
except FileNotFoundError:
log.error("GuardDog binary not found at %s", GUARDDOG_BIN)
return {"issues": [], "errors": ["guarddog_not_found"]}
if result.returncode not in (0, 1):
log.error("GuardDog exited %d: %s", result.returncode, result.stderr)
return {"issues": [], "errors": [result.stderr.strip()]}
try:
data = json.loads(result.stdout)
except json.JSONDecodeError:
log.error("GuardDog returned invalid JSON for %s", filepath)
return {"issues": [], "errors": ["json_parse_error"]}
return _normalize_output(data)
def _normalize_output(data: dict) -> dict:
"""Normalize guarddog JSON output across versions into a consistent format.
GuardDog JSON format (varies by version):
{
"results": [{"rule": "...", "severity": "...", "message": "...", "location": "..."}],
"errors": [...]
}
Or simpler:
{"issues": [...], "errors": [...]}
"""
findings = []
for entry in data.get("results", data.get("issues", [])):
if isinstance(entry, dict):
findings.append({
"rule": entry.get("rule", entry.get("id", "unknown")),
"severity": entry.get("severity", "WARNING"),
"message": entry.get("message", entry.get("description", "")),
"location": entry.get("location", entry.get("path", "")),
})
return {
"findings": findings,
"errors": data.get("errors", []),
}

View File

@@ -0,0 +1 @@
/* static/style.css - minimal overrides for Pico.css dark theme */

View File

View File

@@ -0,0 +1,191 @@
"""Web UI routes — Jinja2 + htmx pages."""
import datetime
from fastapi import APIRouter, Depends, Request
from fastapi.responses import HTMLResponse
from sqlalchemy import func, select
from sqlalchemy.ext.asyncio import AsyncSession
from guarddog_nexus.database import get_session
from guarddog_nexus.models import Finding, Scan
router = APIRouter(tags=["web"])
TEMPLATES: dict[str, str] = {}
def _render(name: str, **context) -> HTMLResponse:
from jinja2 import Environment, PackageLoader, select_autoescape
env = Environment(
loader=PackageLoader("guarddog_nexus", "web/templates"),
autoescape=select_autoescape(),
)
template = env.get_template(name)
return HTMLResponse(template.render(**context))
@router.get("/", response_class=HTMLResponse)
async def dashboard(request: Request, session: AsyncSession = Depends(get_session)):
total_scans = await session.scalar(select(func.count(Scan.id)))
flagged_scans = await session.scalar(
select(func.count(Scan.id)).where(Scan.flagged == True)
)
recent_flagged = await session.scalar(
select(func.count(Scan.id)).where(
Scan.flagged == True,
Scan.started_at >= func.datetime("now", "-7 days"),
)
)
total_findings = await session.scalar(select(func.count(Finding.id)))
latest_scans = (
(await session.execute(
select(Scan).order_by(Scan.started_at.desc()).limit(10)
))
.scalars()
.all()
)
top_rules = (
await session.execute(
select(Finding.rule, func.count(Finding.id).label("cnt"))
.group_by(Finding.rule)
.order_by(func.count(Finding.id).desc())
.limit(10)
)
).all()
return _render(
"dashboard.html",
total_scans=total_scans,
flagged_scans=flagged_scans,
recent_flagged=recent_flagged,
total_findings=total_findings,
latest_scans=latest_scans,
top_rules=[(r.rule, r.cnt) for r in top_rules],
now=datetime.datetime.now(datetime.timezone.utc),
request=request,
)
@router.get("/scans", response_class=HTMLResponse)
async def scans_list(
request: Request,
page: int = 1,
flagged: str = "",
session: AsyncSession = Depends(get_session),
):
per_page = 50
offset = (page - 1) * per_page
q = select(Scan)
if flagged == "1":
q = q.where(Scan.flagged == True)
q = q.order_by(Scan.started_at.desc()).offset(offset).limit(per_page)
scans = (await session.execute(q)).scalars().all()
total = await session.scalar(select(func.count(Scan.id)))
return _render(
"scans_list.html",
scans=scans,
page=page,
per_page=per_page,
total=total,
flagged_filter=flagged,
request=request,
)
@router.get("/scans/{scan_id}", response_class=HTMLResponse)
async def scan_detail(scan_id: int, request: Request, session: AsyncSession = Depends(get_session)):
from sqlalchemy.orm import selectinload
scan = await session.scalar(
select(Scan).where(Scan.id == scan_id).options(selectinload(Scan.findings))
)
if not scan:
return HTMLResponse("<h1>Not found</h1>", status_code=404)
return _render("scan_detail.html", scan=scan, request=request)
@router.get("/packages", response_class=HTMLResponse)
async def packages_list(
request: Request,
page: int = 1,
flagged: str = "",
session: AsyncSession = Depends(get_session),
):
per_page = 50
offset = (page - 1) * per_page
subq = select(
Scan.package_name.label("pkg_name"),
Scan.package_version.label("pkg_ver"),
Scan.ecosystem,
Scan.repository,
func.max(Scan.started_at).label("last_scan"),
func.max(Scan.flagged).label("is_flagged"),
func.sum(Scan.total_findings).label("findings_sum"),
func.max(Scan.id).label("sid"),
).group_by(Scan.package_name, Scan.package_version)
if flagged == "1":
subq = subq.having(func.max(Scan.flagged) == True)
subq = subq.subquery()
total = await session.scalar(select(func.count()).select_from(subq))
rows = (
await session.execute(
select(subq)
.order_by(subq.c.last_scan.desc())
.offset(offset)
.limit(per_page)
)
).all()
return _render(
"packages_list.html",
packages=rows,
page=page,
per_page=per_page,
total=total,
flagged_filter=flagged,
request=request,
)
@router.get("/packages/{name}/{version}", response_class=HTMLResponse)
async def package_detail(
name: str,
version: str,
request: Request,
session: AsyncSession = Depends(get_session),
):
from sqlalchemy.orm import selectinload
scans = (
await session.execute(
select(Scan)
.where(Scan.package_name == name, Scan.package_version == version)
.options(selectinload(Scan.findings))
.order_by(Scan.started_at.desc())
)
).scalars().all()
if not scans:
return HTMLResponse("<h1>Not found</h1>", status_code=404)
all_findings = []
for s in scans:
all_findings.extend(s.findings)
return _render(
"package_detail.html",
pkg_name=name,
pkg_version=version,
scans=scans,
findings=all_findings,
request=request,
)

View File

@@ -0,0 +1,41 @@
<!DOCTYPE html>
<html lang="en" data-theme="dark">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>GuardDog Nexus</title>
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@picocss/pico@2/css/pico.min.css">
<script src="https://unpkg.com/htmx.org@2.0.4"></script>
<style>
.flagged { color: var(--pico-color-red-400); font-weight: bold; }
.clean { color: var(--pico-color-green-400); }
.status-pending { color: var(--pico-color-yellow-400); }
.status-scanning { color: var(--pico-color-blue-400); }
.status-completed { color: var(--pico-color-green-400); }
.status-failed { color: var(--pico-color-red-400); }
.severity-WARNING { color: var(--pico-color-yellow-400); }
.severity-ERROR { color: var(--pico-color-red-400); }
.finding-card { margin-bottom: 0.5rem; padding: 0.5rem; border-left: 3px solid; }
.finding-card.WARNING { border-left-color: var(--pico-color-yellow-400); }
.finding-card.ERROR { border-left-color: var(--pico-color-red-400); }
.finding-card.INFO { border-left-color: var(--pico-color-blue-400); }
table { font-size: 0.9rem; }
nav { margin-bottom: 1rem; }
.stats-grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); gap: 1rem; margin-bottom: 2rem; }
.stat-card { text-align: center; padding: 1rem; }
</style>
</head>
<body>
<main class="container">
<nav>
<ul><li><strong><a href="/">GuardDog Nexus</a></strong></li></ul>
<ul>
<li><a href="/">Dashboard</a></li>
<li><a href="/scans">Scans</a></li>
<li><a href="/packages">Packages</a></li>
</ul>
</nav>
{% block content %}{% endblock %}
</main>
</body>
</html>

View File

@@ -0,0 +1,8 @@
{% extends "base.html" %}
{% block content %}
<h1>Dashboard</h1>
<div hx-get="/api/v1/scans/stats" hx-trigger="every 30s" hx-swap="innerHTML">
{% include "dashboard_stats.html" %}
</div>
{% endblock %}

View File

@@ -0,0 +1,56 @@
<div class="stats-grid">
<article class="stat-card">
<h5>{{ total_scans }}</h5>
<small>Total Scans</small>
</article>
<article class="stat-card">
<h5 class="flagged">{{ flagged_scans }}</h5>
<small>Flagged</small>
</article>
<article class="stat-card">
<h5 class="flagged">{{ recent_flagged }}</h5>
<small>Flagged (7 days)</small>
</article>
<article class="stat-card">
<h5>{{ total_findings }}</h5>
<small>Total Findings</small>
</article>
</div>
<h2>Latest Scans</h2>
<table>
<thead>
<tr>
<th>Package</th>
<th>Version</th>
<th>Ecosystem</th>
<th>Status</th>
<th>Findings</th>
<th>Time</th>
</tr>
</thead>
<tbody>
{% for s in latest_scans %}
<tr>
<td><a href="/packages/{{ s.package_name }}/{{ s.package_version }}">{{ s.package_name }}</a></td>
<td>{{ s.package_version }}</td>
<td>{{ s.ecosystem }}</td>
<td><span class="status-{{ s.status }}">{{ s.status }}</span></td>
<td>{% if s.flagged %}<span class="flagged">{{ s.total_findings }}</span>{% else %}<span class="clean">{{ s.total_findings }}</span>{% endif %}</td>
<td>{{ s.started_at.strftime('%Y-%m-%d %H:%M') if s.started_at }}</td>
</tr>
{% endfor %}
</tbody>
</table>
{% if top_rules %}
<h2>Top Rules Triggered</h2>
<table>
<thead><tr><th>Rule</th><th>Count</th></tr></thead>
<tbody>
{% for rule, cnt in top_rules %}
<tr><td><code>{{ rule }}</code></td><td>{{ cnt }}</td></tr>
{% endfor %}
</tbody>
</table>
{% endif %}

View File

@@ -0,0 +1,36 @@
{% extends "base.html" %}
{% block content %}
<h1>{{ pkg_name }} <small>v{{ pkg_version }}</small></h1>
<h2>Scans ({{ scans|length }})</h2>
<table>
<thead>
<tr><th>ID</th><th>Repo</th><th>Status</th><th>Findings</th><th>Time</th></tr>
</thead>
<tbody>
{% for s in scans %}
<tr>
<td><a href="/scans/{{ s.id }}">#{{ s.id }}</a></td>
<td>{{ s.repository }}</td>
<td><span class="status-{{ s.status }}">{{ s.status }}</span></td>
<td>{% if s.flagged %}<span class="flagged">{{ s.total_findings }}</span>{% else %}<span class="clean">0</span>{% endif %}</td>
<td>{{ s.started_at.strftime('%Y-%m-%d %H:%M') if s.started_at }}</td>
</tr>
{% endfor %}
</tbody>
</table>
<h2>Findings ({{ findings|length }})</h2>
{% if findings %}
{% for f in findings|sort(attribute='severity', reverse=true) %}
<article class="finding-card {{ f.severity }}">
<strong class="severity-{{ f.severity }}">[{{ f.severity }}]</strong>
<strong>{{ f.rule }}</strong>
{% if f.location %}<small> @ {{ f.location }}</small>{% endif %}
<p>{{ f.message }}</p>
</article>
{% endfor %}
{% else %}
<p class="clean">No findings — package looks clean.</p>
{% endif %}
{% endblock %}

View File

@@ -0,0 +1,48 @@
{% extends "base.html" %}
{% block content %}
<h1>Packages</h1>
<p>
<a href="?flagged={% if flagged_filter == '1' %}0{% else %}1{% endif %}" role="button" class="outline">
{% if flagged_filter == '1' %}Show all{% else %}Flagged only{% endif %}
</a>
</p>
<table>
<thead>
<tr>
<th>Name</th>
<th>Version</th>
<th>Ecosystem</th>
<th>Repo</th>
<th>Flagged</th>
<th>Findings</th>
<th>Last Scan</th>
</tr>
</thead>
<tbody>
{% for p in packages %}
<tr>
<td><a href="/packages/{{ p.pkg_name }}/{{ p.pkg_ver }}">{{ p.pkg_name }}</a></td>
<td>{{ p.pkg_ver }}</td>
<td>{{ p.ecosystem }}</td>
<td>{{ p.repository }}</td>
<td>{% if p.is_flagged %}<span class="flagged">YES</span>{% else %}<span class="clean">No</span>{% endif %}</td>
<td>{{ p.findings_sum }}</td>
<td>{{ p.last_scan.strftime('%Y-%m-%d %H:%M') if p.last_scan }}</td>
</tr>
{% endfor %}
</tbody>
</table>
{% set total_pages = (total // per_page) + (1 if total % per_page else 0) %}
{% if total_pages > 1 %}
<nav>
<ul>
<li>{% if page > 1 %}<a href="?page={{ page - 1 }}&flagged={{ flagged_filter }}">Prev</a>{% else %}<span>Prev</span>{% endif %}</li>
<li><small>Page {{ page }} of {{ total_pages }}</small></li>
<li>{% if page < total_pages %}<a href="?page={{ page + 1 }}&flagged={{ flagged_filter }}">Next</a>{% else %}<span>Next</span>{% endif %}</li>
</ul>
</nav>
{% endif %}
{% endblock %}

View File

@@ -0,0 +1,30 @@
{% extends "base.html" %}
{% block content %}
<h1>Scan #{{ scan.id }}</h1>
<table>
<tr><td><strong>Package</strong></td><td><a href="/packages/{{ scan.package_name }}/{{ scan.package_version }}">{{ scan.package_name }}</a></td></tr>
<tr><td><strong>Version</strong></td><td>{{ scan.package_version }}</td></tr>
<tr><td><strong>Ecosystem</strong></td><td>{{ scan.ecosystem }}</td></tr>
<tr><td><strong>Repository</strong></td><td>{{ scan.repository }}</td></tr>
<tr><td><strong>Status</strong></td><td><span class="status-{{ scan.status }}">{{ scan.status }}</span></td></tr>
<tr><td><strong>SHA256</strong></td><td><code>{{ scan.sha256 or '-' }}</code></td></tr>
<tr><td><strong>Started</strong></td><td>{{ scan.started_at.isoformat() if scan.started_at }}</td></tr>
<tr><td><strong>Finished</strong></td><td>{{ scan.finished_at.isoformat() if scan.finished_at }}</td></tr>
{% if scan.error_message %}<tr><td><strong>Error</strong></td><td><span class="flagged">{{ scan.error_message }}</span></td></tr>{% endif %}
</table>
<h2>Findings ({{ scan.findings|length }})</h2>
{% if scan.findings %}
{% for f in scan.findings|sort(attribute='severity', reverse=true) %}
<article class="finding-card {{ f.severity }}">
<strong class="severity-{{ f.severity }}">[{{ f.severity }}]</strong>
<strong>{{ f.rule }}</strong>
{% if f.location %}<small> @ {{ f.location }}</small>{% endif %}
<p>{{ f.message }}</p>
</article>
{% endfor %}
{% else %}
<p class="clean">No findings — package looks clean.</p>
{% endif %}
{% endblock %}

View File

@@ -0,0 +1,48 @@
{% extends "base.html" %}
{% block content %}
<h1>Scans</h1>
<p>
<a href="?flagged={% if flagged_filter == '1' %}0{% else %}1{% endif %}" role="button" class="outline">
{% if flagged_filter == '1' %}Show all{% else %}Flagged only{% endif %}
</a>
</p>
<table>
<thead>
<tr>
<th>ID</th>
<th>Package</th>
<th>Version</th>
<th>Repo</th>
<th>Status</th>
<th>Findings</th>
<th>Time</th>
</tr>
</thead>
<tbody>
{% for s in scans %}
<tr>
<td><a href="/scans/{{ s.id }}">#{{ s.id }}</a></td>
<td>{{ s.package_name }}</td>
<td>{{ s.package_version }}</td>
<td>{{ s.repository }}</td>
<td><span class="status-{{ s.status }}">{{ s.status }}</span></td>
<td>{% if s.flagged %}<span class="flagged">{{ s.total_findings }}</span>{% else %}<span class="clean">0</span>{% endif %}</td>
<td>{{ s.started_at.strftime('%Y-%m-%d %H:%M') if s.started_at }}</td>
</tr>
{% endfor %}
</tbody>
</table>
{% set total_pages = (total // per_page) + (1 if total % per_page else 0) %}
{% if total_pages > 1 %}
<nav>
<ul>
<li>{% if page > 1 %}<a href="?page={{ page - 1 }}&flagged={{ flagged_filter }}">Prev</a>{% else %}<span>Prev</span>{% endif %}</li>
<li><small>Page {{ page }} of {{ total_pages }}</small></li>
<li>{% if page < total_pages %}<a href="?page={{ page + 1 }}&flagged={{ flagged_filter }}">Next</a>{% else %}<span>Next</span>{% endif %}</li>
</ul>
</nav>
{% endif %}
{% endblock %}

125
guarddog_nexus/webhooks.py Normal file
View File

@@ -0,0 +1,125 @@
"""Nexus webhook receiver — handles component/asset webhooks."""
import hashlib
import hmac
import json
import re
from fastapi import APIRouter, BackgroundTasks, Header, HTTPException, Request, status
from guarddog_nexus.config import config
from guarddog_nexus.database import get_session
from guarddog_nexus.harvester import harvest
from guarddog_nexus.logging_setup import log
router = APIRouter(prefix="/webhooks", tags=["webhooks"])
RELEVANT_ACTIONS = {"CREATED", "UPDATED"}
EXCLUDE_NAME_PATTERNS = [
re.compile(p) for p in [
r"^simple/",
r"\.html$",
r"\.json$",
r"\.xml$",
r"index\.",
r"\.rss$",
r"\.atom$",
]
]
def _should_skip_asset(filename: str) -> bool:
for pat in EXCLUDE_NAME_PATTERNS:
if pat.search(filename):
return True
return False
@router.post("/nexus")
async def nexus_webhook(
request: Request,
background_tasks: BackgroundTasks,
x_nexus_webhook_signature: str | None = Header(None, alias="X-Nexus-Webhook-Signature"),
):
payload = await request.body()
payload_str = payload.decode("utf-8")
if config.webhook_secret:
if not x_nexus_webhook_signature:
log.warning("Webhook rejected: missing signature header")
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED, detail="Missing signature"
)
expected = hmac.new(
config.webhook_secret.encode(), payload, hashlib.sha256
).hexdigest()
if not hmac.compare_digest(x_nexus_webhook_signature, expected):
log.warning("Webhook rejected: invalid signature")
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Invalid signature")
try:
data = json.loads(payload_str)
except json.JSONDecodeError:
log.warning("Webhook received invalid JSON")
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid JSON")
action = data.get("action", "").upper()
if action not in RELEVANT_ACTIONS:
log.debug("Ignoring action: %s", action)
return {"status": "ignored", "action": action}
asset = data.get("asset") or data.get("component") or data.get("repositoryComponent")
if not asset:
log.warning("Webhook payload has no asset/component")
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="No asset in payload")
asset_name = asset.get("name", "")
if _should_skip_asset(asset_name):
log.debug("Skipping metadata asset: %s", asset_name)
return {"status": "ignored", "reason": "metadata_asset"}
download_url = _extract_download_url(asset, data)
if not download_url:
log.warning("Could not extract download URL from webhook")
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="No download URL")
repository_name = data.get("repositoryName", asset.get("repositoryName", ""))
format_ = asset.get("format", "pypi")
asset_path = asset.get("path", download_url)
log.info(
"Webhook: %s %s in %s (%s)",
action,
asset_name,
repository_name,
format_,
)
background_tasks.add_task(
_scan_in_background, download_url, repository_name, format_, asset_path
)
return {"status": "accepted", "package": asset_name, "action": action}
def _extract_download_url(asset: dict, full_payload: dict) -> str | None:
for key in ("downloadUrl", "download_url", "url"):
val = asset.get(key)
if val:
return val
return full_payload.get("downloadUrl") or full_payload.get("download_url")
async def _scan_in_background(
download_url: str,
repository: str,
format_: str,
asset_path: str,
):
try:
async for session in get_session():
await harvest(download_url, repository, format_, asset_path, session)
break
except Exception as e:
log.error("Background scan failed: %s", e)

View File

@@ -37,6 +37,10 @@ target-version = "py310"
line-length = 100
select = ["E", "F", "I", "W"]
[tool.ruff.lint]
select = ["E", "F", "I", "W"]
ignore = ["E712"]
[tool.ruff.lint.isort]
known-first-party = ["guarddog_nexus"]

73
scripts/setup-nexus.sh Normal file
View File

@@ -0,0 +1,73 @@
#!/bin/sh
# Setup script for test Nexus instance.
# Creates a PyPI proxy repo and a webhook pointing to guarddog-nexus.
NEXUS_URL="${NEXUS_URL:-http://nexus:8081}"
ADMIN_PASSWORD="${ADMIN_PASSWORD:-admin123}"
WEBHOOK_URL="${WEBHOOK_URL:-http://guarddog-nexus:8080/webhooks/nexus}"
echo "Waiting for Nexus to start..."
# Wait until Nexus REST API is available (up to 5 minutes)
MAX_WAIT=300
ELAPSED=0
while [ $ELAPSED -lt $MAX_WAIT ]; do
if curl -sf -o /dev/null "${NEXUS_URL}/service/rest/v1/status" 2>/dev/null; then
echo "Nexus is up!"
break
fi
sleep 5
ELAPSED=$((ELAPSED + 5))
done
if [ $ELAPSED -ge $MAX_WAIT ]; then
echo "Timed out waiting for Nexus"
exit 1
fi
# Check if password needs changing (first run)
ADMIN_PASSWORD_FILE="/nexus-data/admin.password"
if [ -f "$ADMIN_PASSWORD_FILE" ]; then
RANDOM_PASS=$(cat "$ADMIN_PASSWORD_FILE")
echo "Using random admin password: $RANDOM_PASS"
AUTH_PASS="$RANDOM_PASS"
else
AUTH_PASS="$ADMIN_PASSWORD"
fi
echo "Creating PyPI proxy repository..."
curl -sf -u "admin:${AUTH_PASS}" \
-H "Content-Type: application/json" \
-d '{
"name": "pypi-proxy",
"online": true,
"storage": {"blobStoreName": "default", "strictContentTypeValidation": true},
"proxy": {"remoteUrl": "https://pypi.org", "contentMaxAge": 1440},
"format": "pypi"
}' \
"${NEXUS_URL}/service/rest/v1/repositories/pypi/proxy" || echo "Repo may already exist"
echo "Creating webhook..."
curl -sf -u "admin:${AUTH_PASS}" \
-H "Content-Type: application/json" \
-d "{
\"name\": \"guarddog-scan\",
\"eventTypes\": [\"repository.component\", \"repository.asset\"],
\"format\": \"pypi\",
\"url\": \"${WEBHOOK_URL}\",
\"secret\": \"\",
\"enabled\": true
}" \
"${NEXUS_URL}/service/rest/v1/webhooks" || echo "Webhook may already exist"
# Change admin password if this was first run
if [ -f "$ADMIN_PASSWORD_FILE" ]; then
echo "Changing admin password..."
curl -sf -u "admin:${RANDOM_PASS}" \
-H "Content-Type: text/plain" \
-X PUT \
-d "${ADMIN_PASSWORD}" \
"${NEXUS_URL}/service/rest/v1/security/users/admin/change-password"
fi
echo "Nexus setup complete."

0
tests/__init__.py Normal file
View File

131
tests/conftest.py Normal file
View File

@@ -0,0 +1,131 @@
"""Test fixtures for guarddog-nexus."""
import os
import sys
import pytest
import pytest_asyncio
from httpx import ASGITransport, AsyncClient
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
os.environ["DATABASE_PATH"] = ":memory:"
os.environ["NEXUS_URL"] = "http://nexus:8081"
os.environ["NEXUS_USERNAME"] = "admin"
os.environ["NEXUS_PASSWORD"] = "admin123"
os.environ["LOG_SYSLOG_HOST"] = ""
os.environ["TEMP_DIR"] = "/tmp/guarddog-nexus-test"
from guarddog_nexus.database import Base, get_session # noqa: E402
from guarddog_nexus.main import app # noqa: E402
@pytest_asyncio.fixture
async def db_engine():
engine = create_async_engine("sqlite+aiosqlite:///file:guarddog_test?mode=memory&cache=shared&uri=true")
async with engine.begin() as conn:
await conn.run_sync(Base.metadata.create_all)
yield engine
async with engine.begin() as conn:
await conn.run_sync(Base.metadata.drop_all)
await engine.dispose()
@pytest_asyncio.fixture
async def db_session(db_engine):
maker = async_sessionmaker(db_engine, class_=AsyncSession, expire_on_commit=False)
async with maker() as session:
yield session
@pytest_asyncio.fixture
async def client(db_engine):
maker = async_sessionmaker(db_engine, class_=AsyncSession, expire_on_commit=False)
async def override_get_session():
async with maker() as session:
yield session
app.dependency_overrides[get_session] = override_get_session
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as ac:
yield ac
app.dependency_overrides.clear()
@pytest.fixture
def sample_nexus_webhook():
return {
"timestamp": "2026-05-09T12:00:00.000+00:00",
"nodeId": "test-node",
"initiator": "admin",
"action": "CREATED",
"repositoryName": "pypi-proxy",
"asset": {
"name": "requests-2.31.0.tar.gz",
"format": "pypi",
"path": "packages/requests/2.31.0/requests-2.31.0.tar.gz",
"downloadUrl": "http://nexus:8081/repository/pypi-proxy/packages/requests/2.31.0/requests-2.31.0.tar.gz",
},
}
@pytest.fixture
def guarddog_output_clean():
return {
"results": [],
"errors": [],
}
@pytest.fixture
def guarddog_output_flagged():
return {
"results": [
{
"rule": "shady-links",
"severity": "WARNING",
"message": "Package contains URL to suspicious domain",
"location": "setup.py:15",
},
{
"rule": "exec-base64",
"severity": "ERROR",
"message": "Base64-encoded code execution detected",
"location": "core.py:42",
},
],
"errors": [],
}
@pytest.fixture
def guarddog_normalized_flagged():
return {
"findings": [
{
"rule": "shady-links",
"severity": "WARNING",
"message": "Suspicious URL",
"location": "setup.py:15",
},
{
"rule": "exec-base64",
"severity": "ERROR",
"message": "Base64 exec",
"location": "core.py:42",
},
],
"errors": [],
}
@pytest.fixture
def guarddog_normalized_clean():
return {
"findings": [],
"errors": [],
}

72
tests/test_api.py Normal file
View File

@@ -0,0 +1,72 @@
"""Tests for REST API endpoints."""
import pytest
@pytest.mark.asyncio
async def test_health(client):
resp = await client.get("/health")
assert resp.status_code == 200
assert resp.json()["status"] == "ok"
@pytest.mark.asyncio
async def test_list_scans_empty(client):
resp = await client.get("/api/v1/scans")
assert resp.status_code == 200
data = resp.json()
assert data["total"] == 0
assert len(data["scans"]) == 0
@pytest.mark.asyncio
async def test_scan_stats_empty(client):
resp = await client.get("/api/v1/scans/stats")
assert resp.status_code == 200
data = resp.json()
assert data["total_scans"] == 0
assert data["flagged_scans"] == 0
@pytest.mark.asyncio
async def test_scan_not_found(client):
resp = await client.get("/api/v1/scans/99999")
assert resp.status_code == 200
assert "detail" in resp.json()
@pytest.mark.asyncio
async def test_list_packages_empty(client):
resp = await client.get("/api/v1/packages")
assert resp.status_code == 200
data = resp.json()
assert data["total"] == 0
@pytest.mark.asyncio
async def test_list_findings_empty(client):
resp = await client.get("/api/v1/findings")
assert resp.status_code == 200
data = resp.json()
assert data["total"] == 0
@pytest.mark.asyncio
async def test_web_ui_dashboard(client):
resp = await client.get("/")
assert resp.status_code == 200
assert "GuardDog Nexus" in resp.text
@pytest.mark.asyncio
async def test_web_ui_scans(client):
resp = await client.get("/scans")
assert resp.status_code == 200
assert "Scans" in resp.text
@pytest.mark.asyncio
async def test_web_ui_packages(client):
resp = await client.get("/packages")
assert resp.status_code == 200
assert "Packages" in resp.text

114
tests/test_harvester.py Normal file
View File

@@ -0,0 +1,114 @@
"""Tests for harvester pipeline."""
from unittest.mock import patch
import pytest
from sqlalchemy import select
from guarddog_nexus.harvester import harvest
from guarddog_nexus.models import Finding
@pytest.mark.asyncio
async def test_harvest_new_package(db_session, guarddog_normalized_flagged):
with (
patch("guarddog_nexus.harvester.download_asset") as mock_dl,
patch("guarddog_nexus.harvester.compute_sha256") as mock_sha,
patch("guarddog_nexus.harvester.scan_package") as mock_scan,
):
mock_dl.return_value = "/tmp/test-package.tar.gz"
mock_sha.return_value = "abc123"
mock_scan.return_value = guarddog_normalized_flagged
scan = await harvest(
download_url="http://nexus:8081/repository/pypi-proxy/packages/requests/2.31.0/requests-2.31.0.tar.gz",
repository="pypi-proxy",
format_="pypi",
asset_path="packages/requests/2.31.0/requests-2.31.0.tar.gz",
session=db_session,
)
assert scan is not None
assert scan.package_name == "requests"
assert scan.package_version == "2.31.0"
assert scan.ecosystem == "pypi"
assert scan.status == "completed"
assert scan.flagged is True
assert scan.total_findings == 2
assert scan.sha256 == "abc123"
findings = (
(await db_session.execute(select(Finding).where(Finding.scan_id == scan.id)))
.scalars()
.all()
)
assert len(findings) == 2
@pytest.mark.asyncio
async def test_harvest_skips_duplicate(db_session, guarddog_normalized_flagged):
with (
patch("guarddog_nexus.harvester.download_asset") as mock_dl,
patch("guarddog_nexus.harvester.compute_sha256") as mock_sha,
patch("guarddog_nexus.harvester.scan_package") as mock_scan,
):
mock_dl.return_value = "/tmp/test.tar.gz"
mock_sha.return_value = "abc"
mock_scan.return_value = guarddog_normalized_flagged
first = await harvest(
"http://nexus:8081/repo/pypi-proxy/packages/x/1.0/x-1.0.tar.gz",
"pypi-proxy", "pypi", "packages/x/1.0/x-1.0.tar.gz", db_session,
)
second = await harvest(
"http://nexus:8081/repo/pypi-proxy/packages/x/1.0/x-1.0.tar.gz",
"pypi-proxy", "pypi", "packages/x/1.0/x-1.0.tar.gz", db_session,
)
assert first is not None
assert second is None # skipped duplicate
@pytest.mark.asyncio
async def test_harvest_clean_package(db_session, guarddog_normalized_clean):
with (
patch("guarddog_nexus.harvester.download_asset") as mock_dl,
patch("guarddog_nexus.harvester.compute_sha256") as mock_sha,
patch("guarddog_nexus.harvester.scan_package") as mock_scan,
):
mock_dl.return_value = "/tmp/test.tar.gz"
mock_sha.return_value = "abc"
mock_scan.return_value = guarddog_normalized_clean
scan = await harvest(
"http://nexus:8081/repo/pypi-proxy/packages/django/4.2/django-4.2.tar.gz",
"pypi-proxy", "pypi", "packages/django/4.2/django-4.2.tar.gz", db_session,
)
assert scan is not None
assert scan.flagged is False
assert scan.total_findings == 0
@pytest.mark.asyncio
async def test_harvest_download_failure(db_session):
with patch("guarddog_nexus.harvester.download_asset") as mock_dl:
mock_dl.return_value = None
scan = await harvest(
"http://nexus:8081/repo/pypi-proxy/packages/fail/1.0/fail-1.0.tar.gz",
"pypi-proxy", "pypi", "packages/fail/1.0/fail-1.0.tar.gz", db_session,
)
assert scan is not None
assert scan.status == "failed"
assert "Download failed" in (scan.error_message or "")
@pytest.mark.asyncio
async def test_harvest_skips_non_package_asset(db_session):
scan = await harvest(
"http://nexus:8081/repo/pypi-proxy/simple/index.html",
"pypi-proxy", "pypi", "simple/index.html", db_session,
)
assert scan is None

28
tests/test_scanner.py Normal file
View File

@@ -0,0 +1,28 @@
"""Tests for GuardDog scanner integration."""
from guarddog_nexus.scanner import _normalize_output
def test_normalize_clean_output(guarddog_output_clean):
result = _normalize_output(guarddog_output_clean)
assert len(result["findings"]) == 0
assert len(result["errors"]) == 0
def test_normalize_flagged_output(guarddog_output_flagged):
result = _normalize_output(guarddog_output_flagged)
assert len(result["findings"]) == 2
assert result["findings"][0]["rule"] == "shady-links"
assert result["findings"][0]["severity"] == "WARNING"
assert result["findings"][1]["rule"] == "exec-base64"
assert result["findings"][1]["severity"] == "ERROR"
def test_normalize_issues_format():
data = {
"issues": [{"id": "test-rule", "severity": "ERROR", "description": "Bad"}],
"errors": [],
}
result = _normalize_output(data)
assert len(result["findings"]) == 1
assert result["findings"][0]["rule"] == "test-rule"

72
tests/test_webhooks.py Normal file
View File

@@ -0,0 +1,72 @@
"""Tests for Nexus webhook receiver."""
from unittest.mock import patch
import pytest
@pytest.mark.asyncio
async def test_webhook_rejects_invalid_json(client):
resp = await client.post(
"/webhooks/nexus",
content="not json",
headers={"Content-Type": "application/json"},
)
assert resp.status_code == 400
@pytest.mark.asyncio
async def test_webhook_ignores_deleted_action(client, sample_nexus_webhook):
sample_nexus_webhook["action"] = "DELETED"
resp = await client.post(
"/webhooks/nexus",
json=sample_nexus_webhook,
)
assert resp.status_code == 200
assert resp.json()["status"] == "ignored"
@pytest.mark.asyncio
async def test_webhook_accepts_created(client, sample_nexus_webhook):
with patch("guarddog_nexus.webhooks._scan_in_background") as _mock_scan:
resp = await client.post(
"/webhooks/nexus",
json=sample_nexus_webhook,
)
assert resp.status_code == 200
data = resp.json()
assert data["status"] == "accepted"
assert data["package"] == "requests-2.31.0.tar.gz"
assert data["action"] == "CREATED"
@pytest.mark.asyncio
async def test_webhook_accepts_updated(client, sample_nexus_webhook):
sample_nexus_webhook["action"] = "UPDATED"
with patch("guarddog_nexus.webhooks._scan_in_background") as _mock_scan:
resp = await client.post(
"/webhooks/nexus",
json=sample_nexus_webhook,
)
assert resp.status_code == 200
assert resp.json()["status"] == "accepted"
@pytest.mark.asyncio
async def test_webhook_skips_metadata_assets(client, sample_nexus_webhook):
sample_nexus_webhook["asset"]["name"] = "index.html"
resp = await client.post(
"/webhooks/nexus",
json=sample_nexus_webhook,
)
assert resp.status_code == 200
assert resp.json()["status"] == "ignored"
@pytest.mark.asyncio
async def test_webhook_missing_asset(client):
resp = await client.post(
"/webhooks/nexus",
json={"action": "CREATED", "repositoryName": "test"},
)
assert resp.status_code == 400