refactor: Pydantic webhook payload models, lifespan task cancellation, dict/Pydantic compat helpers

This commit is contained in:
Marker689
2026-05-11 22:07:35 +03:00
parent 6e3c2c5caa
commit 3f44de1d98
3 changed files with 66 additions and 27 deletions

View File

@@ -55,19 +55,26 @@ class LangMiddleware(BaseHTTPMiddleware):
async def lifespan(app: FastAPI): async def lifespan(app: FastAPI):
await init_db() await init_db()
log.info("%s started on %s:%s", APP_NAME, config.host, config.port) log.info("%s started on %s:%s", APP_NAME, config.host, config.port)
# Start background lock cleanup tasks tasks = [
asyncio.create_task(_start_lock_cleanup()) asyncio.create_task(_cleanup_url_locks()),
asyncio.create_task(_cleanup_llm_locks()),
]
yield yield
for t in tasks:
t.cancel()
log.info("%s shutting down", APP_NAME) log.info("%s shutting down", APP_NAME)
async def _start_lock_cleanup(): async def _cleanup_url_locks():
"""Start background tasks for cleanup of unused locks.""" from guarddog_nexus.core.harvester import _cleanup_url_locks as _fn
from guarddog_nexus.core.harvester import _cleanup_url_locks
from guarddog_nexus.routes.web import _cleanup_llm_locks
asyncio.create_task(_cleanup_url_locks()) await _fn()
asyncio.create_task(_cleanup_llm_locks())
async def _cleanup_llm_locks():
from guarddog_nexus.routes.web import _cleanup_llm_locks as _fn
await _fn()
class RequestLoggingMiddleware(BaseHTTPMiddleware): class RequestLoggingMiddleware(BaseHTTPMiddleware):

View File

@@ -2,7 +2,6 @@
import hashlib import hashlib
import hmac import hmac
import json
import re import re
from urllib.parse import urlencode from urllib.parse import urlencode
@@ -22,6 +21,7 @@ from ..constants import (
from ..core.harvester import harvest from ..core.harvester import harvest
from ..db.engine import get_session from ..db.engine import get_session
from ..logging_setup import log from ..logging_setup import log
from ..schemas import WebhookPayload
router = APIRouter(prefix="/webhooks", tags=["webhooks"]) router = APIRouter(prefix="/webhooks", tags=["webhooks"])
@@ -41,17 +41,25 @@ def _build_download_url(repo: str, asset_path: str) -> str:
return f"{base}/repository/{repo}/{asset_path}" return f"{base}/repository/{repo}/{asset_path}"
def _extract_asset_path(asset: dict) -> str | None: def _extract_asset_path(asset) -> str | None:
if isinstance(asset, dict):
for key in ("path", "name"): for key in ("path", "name"):
val = asset.get(key) val = asset.get(key)
if val: if val:
return val return val
return None return None
if asset.path:
return asset.path
if asset.name:
return asset.name
return None
def _detect_ecosystem(source: dict) -> str | None: def _detect_ecosystem(source) -> str | None:
"""Detect ecosystem from asset or component format field.""" if isinstance(source, dict):
fmt = source.get("format", "").lower() fmt = source.get("format", "").lower()
else:
fmt = (source.format or "").lower()
if fmt in ("pypi", "pip", "python"): if fmt in ("pypi", "pip", "python"):
return "pypi" return "pypi"
if fmt in ("go", "golang"): if fmt in ("go", "golang"):
@@ -81,17 +89,17 @@ async def nexus_webhook(
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Invalid signature") raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Invalid signature")
try: try:
data = json.loads(payload.decode("utf-8")) data = WebhookPayload.model_validate_json(payload.decode("utf-8"))
except (json.JSONDecodeError, UnicodeDecodeError): except Exception:
log.warning("Webhook received invalid body") log.warning("Webhook received invalid body")
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid request body") raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid request body")
action = data.get("action", "").upper() action = data.action.upper()
if action not in RELEVANT_WEBHOOK_ACTIONS: if action not in RELEVANT_WEBHOOK_ACTIONS:
return {"status": WEBHOOK_STATUS_IGNORED, "action": action} return {"status": WEBHOOK_STATUS_IGNORED, "action": action}
# Nexus sends initiator as "username/IP" — parse both fields # Nexus sends initiator as "username/IP" — parse both fields
raw_initiator = data.get("initiator", "") raw_initiator = data.initiator or ""
initiator = None initiator = None
source_ip = None source_ip = None
if raw_initiator and "/" in raw_initiator: if raw_initiator and "/" in raw_initiator:
@@ -104,21 +112,21 @@ async def nexus_webhook(
log.info("Webhook: action=%s initiator=%s source_ip=%s", action, initiator, source_ip) log.info("Webhook: action=%s initiator=%s source_ip=%s", action, initiator, source_ip)
repository = data.get("repositoryName", "") repository = data.repositoryName
if not repository: if not repository:
log.warning("Webhook rejected: missing repositoryName") log.warning("Webhook rejected: missing repositoryName")
raise HTTPException( raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST, detail="Missing repository name" status_code=status.HTTP_400_BAD_REQUEST, detail="Missing repository name"
) )
asset = data.get("asset") asset = data.asset
component = data.get("component") component = data.component
if asset: if asset:
asset_path = _extract_asset_path(asset) asset_path = _extract_asset_path(asset)
if not asset_path or not _is_package_asset(asset_path): if not asset_path or not _is_package_asset(asset_path):
return {"status": WEBHOOK_STATUS_IGNORED, "reason": WEBHOOK_IGNORE_NON_PACKAGE} return {"status": WEBHOOK_STATUS_IGNORED, "reason": WEBHOOK_IGNORE_NON_PACKAGE}
download_url = asset.get("downloadUrl") or _build_download_url(repository, asset_path) download_url = asset.downloadUrl or _build_download_url(repository, asset_path)
ecosystem = _detect_ecosystem(asset) ecosystem = _detect_ecosystem(asset)
if ecosystem is None: if ecosystem is None:
return {"status": WEBHOOK_STATUS_IGNORED, "reason": "unknown_ecosystem"} return {"status": WEBHOOK_STATUS_IGNORED, "reason": "unknown_ecosystem"}
@@ -137,8 +145,8 @@ async def nexus_webhook(
return {"status": WEBHOOK_STATUS_ACCEPTED, "asset": asset_path, "action": action} return {"status": WEBHOOK_STATUS_ACCEPTED, "asset": asset_path, "action": action}
if component: if component:
name = component.get("name", "") name = component.name
version = component.get("version", "") version = component.version
if not name or not version: if not name or not version:
return { return {
"status": WEBHOOK_STATUS_IGNORED, "status": WEBHOOK_STATUS_IGNORED,

View File

@@ -102,6 +102,30 @@ class StatsResponse(BaseModel):
latest_scan_at: datetime | None = None latest_scan_at: datetime | None = None
# Webhook payload models
class WebhookAsset(BaseModel):
id: str | None = None
format: str = ""
path: str | None = None
name: str | None = None
downloadUrl: str | None = None
class WebhookComponent(BaseModel):
id: str | None = None
format: str = ""
name: str = ""
version: str = ""
class WebhookPayload(BaseModel):
action: str = ""
repositoryName: str = ""
initiator: str | None = None
asset: WebhookAsset | None = None
component: WebhookComponent | None = None
# Finding data known fields (prevents **f.data from overwriting id/scan_id) # Finding data known fields (prevents **f.data from overwriting id/scan_id)
_FINDING_DATA_FIELDS = ("rule", "severity", "message", "location", "code") _FINDING_DATA_FIELDS = ("rule", "severity", "message", "location", "code")