feat: guarddog-nexus — webhook-based PyPI scanner with web UI
This commit is contained in:
125
guarddog_nexus/webhooks.py
Normal file
125
guarddog_nexus/webhooks.py
Normal file
@@ -0,0 +1,125 @@
|
||||
"""Nexus webhook receiver — handles component/asset webhooks."""
|
||||
|
||||
import hashlib
|
||||
import hmac
|
||||
import json
|
||||
import re
|
||||
|
||||
from fastapi import APIRouter, BackgroundTasks, Header, HTTPException, Request, status
|
||||
|
||||
from guarddog_nexus.config import config
|
||||
from guarddog_nexus.database import get_session
|
||||
from guarddog_nexus.harvester import harvest
|
||||
from guarddog_nexus.logging_setup import log
|
||||
|
||||
router = APIRouter(prefix="/webhooks", tags=["webhooks"])
|
||||
|
||||
RELEVANT_ACTIONS = {"CREATED", "UPDATED"}
|
||||
|
||||
EXCLUDE_NAME_PATTERNS = [
|
||||
re.compile(p) for p in [
|
||||
r"^simple/",
|
||||
r"\.html$",
|
||||
r"\.json$",
|
||||
r"\.xml$",
|
||||
r"index\.",
|
||||
r"\.rss$",
|
||||
r"\.atom$",
|
||||
]
|
||||
]
|
||||
|
||||
|
||||
def _should_skip_asset(filename: str) -> bool:
|
||||
for pat in EXCLUDE_NAME_PATTERNS:
|
||||
if pat.search(filename):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
@router.post("/nexus")
|
||||
async def nexus_webhook(
|
||||
request: Request,
|
||||
background_tasks: BackgroundTasks,
|
||||
x_nexus_webhook_signature: str | None = Header(None, alias="X-Nexus-Webhook-Signature"),
|
||||
):
|
||||
payload = await request.body()
|
||||
payload_str = payload.decode("utf-8")
|
||||
|
||||
if config.webhook_secret:
|
||||
if not x_nexus_webhook_signature:
|
||||
log.warning("Webhook rejected: missing signature header")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED, detail="Missing signature"
|
||||
)
|
||||
expected = hmac.new(
|
||||
config.webhook_secret.encode(), payload, hashlib.sha256
|
||||
).hexdigest()
|
||||
if not hmac.compare_digest(x_nexus_webhook_signature, expected):
|
||||
log.warning("Webhook rejected: invalid signature")
|
||||
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Invalid signature")
|
||||
|
||||
try:
|
||||
data = json.loads(payload_str)
|
||||
except json.JSONDecodeError:
|
||||
log.warning("Webhook received invalid JSON")
|
||||
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid JSON")
|
||||
|
||||
action = data.get("action", "").upper()
|
||||
if action not in RELEVANT_ACTIONS:
|
||||
log.debug("Ignoring action: %s", action)
|
||||
return {"status": "ignored", "action": action}
|
||||
|
||||
asset = data.get("asset") or data.get("component") or data.get("repositoryComponent")
|
||||
if not asset:
|
||||
log.warning("Webhook payload has no asset/component")
|
||||
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="No asset in payload")
|
||||
|
||||
asset_name = asset.get("name", "")
|
||||
if _should_skip_asset(asset_name):
|
||||
log.debug("Skipping metadata asset: %s", asset_name)
|
||||
return {"status": "ignored", "reason": "metadata_asset"}
|
||||
|
||||
download_url = _extract_download_url(asset, data)
|
||||
if not download_url:
|
||||
log.warning("Could not extract download URL from webhook")
|
||||
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="No download URL")
|
||||
|
||||
repository_name = data.get("repositoryName", asset.get("repositoryName", ""))
|
||||
format_ = asset.get("format", "pypi")
|
||||
asset_path = asset.get("path", download_url)
|
||||
|
||||
log.info(
|
||||
"Webhook: %s %s in %s (%s)",
|
||||
action,
|
||||
asset_name,
|
||||
repository_name,
|
||||
format_,
|
||||
)
|
||||
|
||||
background_tasks.add_task(
|
||||
_scan_in_background, download_url, repository_name, format_, asset_path
|
||||
)
|
||||
|
||||
return {"status": "accepted", "package": asset_name, "action": action}
|
||||
|
||||
|
||||
def _extract_download_url(asset: dict, full_payload: dict) -> str | None:
|
||||
for key in ("downloadUrl", "download_url", "url"):
|
||||
val = asset.get(key)
|
||||
if val:
|
||||
return val
|
||||
return full_payload.get("downloadUrl") or full_payload.get("download_url")
|
||||
|
||||
|
||||
async def _scan_in_background(
|
||||
download_url: str,
|
||||
repository: str,
|
||||
format_: str,
|
||||
asset_path: str,
|
||||
):
|
||||
try:
|
||||
async for session in get_session():
|
||||
await harvest(download_url, repository, format_, asset_path, session)
|
||||
break
|
||||
except Exception as e:
|
||||
log.error("Background scan failed: %s", e)
|
||||
Reference in New Issue
Block a user