"""Nexus webhook receiver — handles component/asset webhooks.""" import hashlib import hmac import json import re from fastapi import APIRouter, BackgroundTasks, Header, HTTPException, Request, status from guarddog_nexus.config import config from guarddog_nexus.database import get_session from guarddog_nexus.harvester import harvest from guarddog_nexus.logging_setup import log router = APIRouter(prefix="/webhooks", tags=["webhooks"]) RELEVANT_ACTIONS = {"CREATED", "UPDATED"} EXCLUDE_NAME_PATTERNS = [ re.compile(p) for p in [ r"^simple/", r"\.html$", r"\.json$", r"\.xml$", r"index\.", r"\.rss$", r"\.atom$", ] ] def _should_skip_asset(filename: str) -> bool: for pat in EXCLUDE_NAME_PATTERNS: if pat.search(filename): return True return False @router.post("/nexus") async def nexus_webhook( request: Request, background_tasks: BackgroundTasks, x_nexus_webhook_signature: str | None = Header(None, alias="X-Nexus-Webhook-Signature"), ): payload = await request.body() payload_str = payload.decode("utf-8") if config.webhook_secret: if not x_nexus_webhook_signature: log.warning("Webhook rejected: missing signature header") raise HTTPException( status_code=status.HTTP_401_UNAUTHORIZED, detail="Missing signature" ) expected = hmac.new(config.webhook_secret.encode(), payload, hashlib.sha256).hexdigest() if not hmac.compare_digest(x_nexus_webhook_signature, expected): log.warning("Webhook rejected: invalid signature") raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Invalid signature") try: data = json.loads(payload_str) except json.JSONDecodeError: log.warning("Webhook received invalid JSON") raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid JSON") action = data.get("action", "").upper() if action not in RELEVANT_ACTIONS: log.debug("Ignoring action: %s", action) return {"status": "ignored", "action": action} asset = data.get("asset") or data.get("component") or data.get("repositoryComponent") if not asset: log.warning("Webhook payload has no asset/component") raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="No asset in payload") asset_name = asset.get("name", "") if _should_skip_asset(asset_name): log.debug("Skipping metadata asset: %s", asset_name) return {"status": "ignored", "reason": "metadata_asset"} download_url = _extract_download_url(asset, data) if not download_url: log.warning("Could not extract download URL from webhook") raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="No download URL") repository_name = data.get("repositoryName", asset.get("repositoryName", "")) format_ = asset.get("format", "pypi") asset_path = asset.get("path", download_url) log.info( "Webhook: %s %s in %s (%s)", action, asset_name, repository_name, format_, ) background_tasks.add_task( _scan_in_background, download_url, repository_name, format_, asset_path ) return {"status": "accepted", "package": asset_name, "action": action} def _extract_download_url(asset: dict, full_payload: dict) -> str | None: for key in ("downloadUrl", "download_url", "url"): val = asset.get(key) if val: return val return full_payload.get("downloadUrl") or full_payload.get("download_url") async def _scan_in_background( download_url: str, repository: str, format_: str, asset_path: str, ): try: async for session in get_session(): await harvest(download_url, repository, format_, asset_path, session) break except Exception as e: log.error("Background scan failed: %s", e)