Files
guarddog-nexus/guarddog_nexus/routes/webhooks.py

254 lines
7.9 KiB
Python

"""Nexus webhook receiver — handles component/asset webhooks."""
import hashlib
import hmac
import re
from urllib.parse import urlencode
from fastapi import APIRouter, BackgroundTasks, Header, HTTPException, Request, status
from ..config import config
from ..constants import (
METADATA_PATTERNS,
PACKAGE_EXTENSIONS,
RELEVANT_WEBHOOK_ACTIONS,
WEBHOOK_IGNORE_NO_ASSET_OR_COMPONENT,
WEBHOOK_IGNORE_NO_NAME_OR_VERSION,
WEBHOOK_IGNORE_NON_PACKAGE,
WEBHOOK_STATUS_ACCEPTED,
WEBHOOK_STATUS_IGNORED,
)
from ..core.harvester import harvest
from ..db.engine import get_session
from ..logging_setup import log
from ..schemas import WebhookPayload
router = APIRouter(prefix="/webhooks", tags=["webhooks"])
_METADATA_RE = [re.compile(p) for p in METADATA_PATTERNS]
def _is_package_asset(name: str) -> bool:
for pat in _METADATA_RE:
if pat.search(name):
return False
return name.endswith(PACKAGE_EXTENSIONS)
def _build_download_url(repo: str, asset_path: str) -> str:
base = config.nexus_url.rstrip("/")
asset_path = asset_path.strip("/")
return f"{base}/repository/{repo}/{asset_path}"
def _extract_asset_path(asset) -> str | None:
if isinstance(asset, dict):
for key in ("path", "name"):
val = asset.get(key)
if val:
return val
return None
if asset.path:
return asset.path
if asset.name:
return asset.name
return None
def _detect_ecosystem(source) -> str | None:
if isinstance(source, dict):
fmt = source.get("format", "").lower()
else:
fmt = (source.format or "").lower()
if fmt in ("pypi", "pip", "python"):
return "pypi"
if fmt in ("go", "golang"):
return "go"
if fmt in ("npm", "node"):
return "npm"
return None
@router.post("/nexus")
async def nexus_webhook(
request: Request,
background_tasks: BackgroundTasks,
x_nexus_webhook_signature: str | None = Header(None, alias="X-Nexus-Webhook-Signature"),
) -> dict:
payload = await request.body()
if config.webhook_secret:
if not x_nexus_webhook_signature:
log.warning("Webhook rejected: missing signature header")
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED, detail="Missing signature"
)
expected = hmac.new(config.webhook_secret.encode(), payload, hashlib.sha256).hexdigest()
if not hmac.compare_digest(x_nexus_webhook_signature, expected):
log.warning("Webhook rejected: invalid signature")
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Invalid signature")
try:
data = WebhookPayload.model_validate_json(payload.decode("utf-8"))
except Exception:
log.warning("Webhook received invalid body")
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid request body")
action = data.action.upper()
if action not in RELEVANT_WEBHOOK_ACTIONS:
return {"status": WEBHOOK_STATUS_IGNORED, "action": action}
# Nexus sends initiator as "username/IP" — parse both fields
raw_initiator = data.initiator or ""
initiator = None
source_ip = None
if raw_initiator and "/" in raw_initiator:
parts = raw_initiator.rsplit("/", 1)
initiator = parts[0]
source_ip = parts[1]
elif raw_initiator:
initiator = raw_initiator
source_ip = request.client.host if request.client else None
log.info("Webhook: action=%s initiator=%s source_ip=%s", action, initiator, source_ip)
repository = data.repositoryName
if not repository:
log.warning("Webhook rejected: missing repositoryName")
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST, detail="Missing repository name"
)
asset = data.asset
component = data.component
if asset:
asset_path = _extract_asset_path(asset)
if not asset_path or not _is_package_asset(asset_path):
return {"status": WEBHOOK_STATUS_IGNORED, "reason": WEBHOOK_IGNORE_NON_PACKAGE}
download_url = asset.downloadUrl or _build_download_url(repository, asset_path)
ecosystem = _detect_ecosystem(asset)
if ecosystem is None:
return {"status": WEBHOOK_STATUS_IGNORED, "reason": "unknown_ecosystem"}
log.info("Webhook: %s asset %s (%s) in %s", action, asset_path, ecosystem, repository)
background_tasks.add_task(
_scan_in_background,
download_url,
repository,
ecosystem,
asset_path,
initiator=initiator,
source_ip=source_ip,
)
return {"status": WEBHOOK_STATUS_ACCEPTED, "asset": asset_path, "action": action}
if component:
name = component.name
version = component.version
if not name or not version:
return {
"status": WEBHOOK_STATUS_IGNORED,
"reason": WEBHOOK_IGNORE_NO_NAME_OR_VERSION,
}
ecosystem = _detect_ecosystem(component)
if ecosystem is None:
return {"status": WEBHOOK_STATUS_IGNORED, "reason": "unknown_ecosystem"}
background_tasks.add_task(
_scan_component, repository, name, version, ecosystem, initiator, source_ip
)
return {
"status": WEBHOOK_STATUS_ACCEPTED,
"component": f"{name}=={version}",
"action": action,
}
return {
"status": WEBHOOK_STATUS_IGNORED,
"reason": WEBHOOK_IGNORE_NO_ASSET_OR_COMPONENT,
}
async def _scan_component(
repository: str,
name: str,
version: str,
ecosystem: str,
initiator: str | None = None,
source_ip: str | None = None,
):
try:
from ..core.nexus import nexus_get
params = urlencode(
{
"repository": repository,
"name": name,
"version": version,
"format": ecosystem,
}
)
api_path = f"/service/rest/v1/search?{params}"
try:
resp = await nexus_get(api_path)
resp.raise_for_status()
data = resp.json()
except Exception as e:
log.warning("Component lookup error for %s==%s: %s", name, version, e)
return
items = data.get("items", [])
if not items:
log.warning("No items found in search for %s==%s", name, version)
return
for item in items:
for asset in item.get("assets", []):
asset_path = _extract_asset_path(asset)
if not asset_path or not _is_package_asset(asset_path):
continue
download_url = asset.get("downloadUrl") or _build_download_url(
repository, asset_path
)
log.info("Scanning component asset: %s", asset_path)
async for session in get_session():
await harvest(
download_url,
repository,
ecosystem,
asset_path,
session,
initiator=initiator,
source_ip=source_ip,
)
break
except Exception as e:
log.error("Component scan failed for %s==%s: %s", name, version, e)
async def _scan_in_background(
download_url: str,
repository: str,
format_: str,
asset_path: str,
initiator: str | None = None,
source_ip: str | None = None,
):
try:
async for session in get_session():
await harvest(
download_url,
repository,
format_,
asset_path,
session,
initiator=initiator,
source_ip=source_ip,
)
break
except Exception as e:
log.error("Background scan failed: %s", e)