diff --git a/guarddog_nexus/main.py b/guarddog_nexus/main.py index 2ff67b6..3ca45d8 100644 --- a/guarddog_nexus/main.py +++ b/guarddog_nexus/main.py @@ -55,19 +55,26 @@ class LangMiddleware(BaseHTTPMiddleware): async def lifespan(app: FastAPI): await init_db() log.info("%s started on %s:%s", APP_NAME, config.host, config.port) - # Start background lock cleanup tasks - asyncio.create_task(_start_lock_cleanup()) + tasks = [ + asyncio.create_task(_cleanup_url_locks()), + asyncio.create_task(_cleanup_llm_locks()), + ] yield + for t in tasks: + t.cancel() log.info("%s shutting down", APP_NAME) -async def _start_lock_cleanup(): - """Start background tasks for cleanup of unused locks.""" - from guarddog_nexus.core.harvester import _cleanup_url_locks - from guarddog_nexus.routes.web import _cleanup_llm_locks +async def _cleanup_url_locks(): + from guarddog_nexus.core.harvester import _cleanup_url_locks as _fn - asyncio.create_task(_cleanup_url_locks()) - asyncio.create_task(_cleanup_llm_locks()) + await _fn() + + +async def _cleanup_llm_locks(): + from guarddog_nexus.routes.web import _cleanup_llm_locks as _fn + + await _fn() class RequestLoggingMiddleware(BaseHTTPMiddleware): diff --git a/guarddog_nexus/routes/webhooks.py b/guarddog_nexus/routes/webhooks.py index 1bf7982..75eaaca 100644 --- a/guarddog_nexus/routes/webhooks.py +++ b/guarddog_nexus/routes/webhooks.py @@ -2,7 +2,6 @@ import hashlib import hmac -import json import re from urllib.parse import urlencode @@ -22,6 +21,7 @@ from ..constants import ( from ..core.harvester import harvest from ..db.engine import get_session from ..logging_setup import log +from ..schemas import WebhookPayload router = APIRouter(prefix="/webhooks", tags=["webhooks"]) @@ -41,17 +41,25 @@ def _build_download_url(repo: str, asset_path: str) -> str: return f"{base}/repository/{repo}/{asset_path}" -def _extract_asset_path(asset: dict) -> str | None: - for key in ("path", "name"): - val = asset.get(key) - if val: - return val +def _extract_asset_path(asset) -> str | None: + if isinstance(asset, dict): + for key in ("path", "name"): + val = asset.get(key) + if val: + return val + return None + if asset.path: + return asset.path + if asset.name: + return asset.name return None -def _detect_ecosystem(source: dict) -> str | None: - """Detect ecosystem from asset or component format field.""" - fmt = source.get("format", "").lower() +def _detect_ecosystem(source) -> str | None: + if isinstance(source, dict): + fmt = source.get("format", "").lower() + else: + fmt = (source.format or "").lower() if fmt in ("pypi", "pip", "python"): return "pypi" if fmt in ("go", "golang"): @@ -81,17 +89,17 @@ async def nexus_webhook( raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Invalid signature") try: - data = json.loads(payload.decode("utf-8")) - except (json.JSONDecodeError, UnicodeDecodeError): + data = WebhookPayload.model_validate_json(payload.decode("utf-8")) + except Exception: log.warning("Webhook received invalid body") raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid request body") - action = data.get("action", "").upper() + action = data.action.upper() if action not in RELEVANT_WEBHOOK_ACTIONS: return {"status": WEBHOOK_STATUS_IGNORED, "action": action} # Nexus sends initiator as "username/IP" — parse both fields - raw_initiator = data.get("initiator", "") + raw_initiator = data.initiator or "" initiator = None source_ip = None if raw_initiator and "/" in raw_initiator: @@ -104,21 +112,21 @@ async def nexus_webhook( log.info("Webhook: action=%s initiator=%s source_ip=%s", action, initiator, source_ip) - repository = data.get("repositoryName", "") + repository = data.repositoryName if not repository: log.warning("Webhook rejected: missing repositoryName") raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, detail="Missing repository name" ) - asset = data.get("asset") - component = data.get("component") + asset = data.asset + component = data.component if asset: asset_path = _extract_asset_path(asset) if not asset_path or not _is_package_asset(asset_path): return {"status": WEBHOOK_STATUS_IGNORED, "reason": WEBHOOK_IGNORE_NON_PACKAGE} - download_url = asset.get("downloadUrl") or _build_download_url(repository, asset_path) + download_url = asset.downloadUrl or _build_download_url(repository, asset_path) ecosystem = _detect_ecosystem(asset) if ecosystem is None: return {"status": WEBHOOK_STATUS_IGNORED, "reason": "unknown_ecosystem"} @@ -137,8 +145,8 @@ async def nexus_webhook( return {"status": WEBHOOK_STATUS_ACCEPTED, "asset": asset_path, "action": action} if component: - name = component.get("name", "") - version = component.get("version", "") + name = component.name + version = component.version if not name or not version: return { "status": WEBHOOK_STATUS_IGNORED, diff --git a/guarddog_nexus/schemas.py b/guarddog_nexus/schemas.py index e264787..0f2d32d 100644 --- a/guarddog_nexus/schemas.py +++ b/guarddog_nexus/schemas.py @@ -102,6 +102,30 @@ class StatsResponse(BaseModel): latest_scan_at: datetime | None = None +# Webhook payload models +class WebhookAsset(BaseModel): + id: str | None = None + format: str = "" + path: str | None = None + name: str | None = None + downloadUrl: str | None = None + + +class WebhookComponent(BaseModel): + id: str | None = None + format: str = "" + name: str = "" + version: str = "" + + +class WebhookPayload(BaseModel): + action: str = "" + repositoryName: str = "" + initiator: str | None = None + asset: WebhookAsset | None = None + component: WebhookComponent | None = None + + # Finding data known fields (prevents **f.data from overwriting id/scan_id) _FINDING_DATA_FIELDS = ("rule", "severity", "message", "location", "code")