fix: real nexus webhook format, atomic dedup, tested live
This commit is contained in:
@@ -16,25 +16,41 @@ router = APIRouter(prefix="/webhooks", tags=["webhooks"])
|
||||
|
||||
RELEVANT_ACTIONS = {"CREATED", "UPDATED"}
|
||||
|
||||
EXCLUDE_NAME_PATTERNS = [
|
||||
METADATA_PATTERNS = [
|
||||
re.compile(p)
|
||||
for p in [
|
||||
r"^simple/",
|
||||
r"^/?simple/",
|
||||
r"\.html$",
|
||||
r"\.json$",
|
||||
r"\.xml$",
|
||||
r"index\.",
|
||||
r"/?index\.",
|
||||
r"\.rss$",
|
||||
r"\.atom$",
|
||||
]
|
||||
]
|
||||
|
||||
PACKAGE_EXTENSIONS = (".tar.gz", ".tgz", ".whl", ".zip", ".gem")
|
||||
|
||||
def _should_skip_asset(filename: str) -> bool:
|
||||
for pat in EXCLUDE_NAME_PATTERNS:
|
||||
if pat.search(filename):
|
||||
return True
|
||||
return False
|
||||
|
||||
def _is_package_asset(name: str) -> bool:
|
||||
for pat in METADATA_PATTERNS:
|
||||
if pat.search(name):
|
||||
return False
|
||||
return name.endswith(PACKAGE_EXTENSIONS)
|
||||
|
||||
|
||||
def _build_download_url(repo: str, asset_path: str) -> str:
|
||||
base = config.nexus_url.rstrip("/")
|
||||
asset_path = asset_path.lstrip("/")
|
||||
return f"{base}/repository/{repo}/{asset_path}"
|
||||
|
||||
|
||||
def _extract_asset_path(asset: dict) -> str | None:
|
||||
for key in ("path", "name"):
|
||||
val = asset.get(key)
|
||||
if val:
|
||||
return val
|
||||
return None
|
||||
|
||||
|
||||
@router.post("/nexus")
|
||||
@@ -65,49 +81,76 @@ async def nexus_webhook(
|
||||
|
||||
action = data.get("action", "").upper()
|
||||
if action not in RELEVANT_ACTIONS:
|
||||
log.debug("Ignoring action: %s", action)
|
||||
return {"status": "ignored", "action": action}
|
||||
|
||||
asset = data.get("asset") or data.get("component") or data.get("repositoryComponent")
|
||||
if not asset:
|
||||
log.warning("Webhook payload has no asset/component")
|
||||
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="No asset in payload")
|
||||
repository = data.get("repositoryName", "")
|
||||
|
||||
asset_name = asset.get("name", "")
|
||||
if _should_skip_asset(asset_name):
|
||||
log.debug("Skipping metadata asset: %s", asset_name)
|
||||
return {"status": "ignored", "reason": "metadata_asset"}
|
||||
asset = data.get("asset")
|
||||
component = data.get("component")
|
||||
|
||||
download_url = _extract_download_url(asset, data)
|
||||
if not download_url:
|
||||
log.warning("Could not extract download URL from webhook")
|
||||
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="No download URL")
|
||||
if asset:
|
||||
asset_path = _extract_asset_path(asset)
|
||||
if not asset_path or not _is_package_asset(asset_path):
|
||||
return {"status": "ignored", "reason": "non_package_asset"}
|
||||
|
||||
repository_name = data.get("repositoryName", asset.get("repositoryName", ""))
|
||||
format_ = asset.get("format", "pypi")
|
||||
asset_path = asset.get("path", download_url)
|
||||
download_url = asset.get("downloadUrl") or _build_download_url(repository, asset_path)
|
||||
|
||||
log.info(
|
||||
"Webhook: %s %s in %s (%s)",
|
||||
action,
|
||||
asset_name,
|
||||
repository_name,
|
||||
format_,
|
||||
log.info("Webhook: %s asset %s in %s", action, asset_path, repository)
|
||||
|
||||
background_tasks.add_task(_scan_in_background, download_url, repository, "pypi", asset_path)
|
||||
return {"status": "accepted", "asset": asset_path, "action": action}
|
||||
|
||||
if component:
|
||||
name = component.get("name", "")
|
||||
version = component.get("version", "")
|
||||
if not name or not version:
|
||||
return {"status": "ignored", "reason": "no_name_or_version"}
|
||||
|
||||
# For component events, look up assets via Nexus REST API
|
||||
background_tasks.add_task(_scan_component, repository, name, version)
|
||||
return {"status": "accepted", "component": f"{name}=={version}", "action": action}
|
||||
|
||||
return {"status": "ignored", "reason": "no_asset_or_component"}
|
||||
|
||||
|
||||
async def _scan_component(repository: str, name: str, version: str):
|
||||
"""Look up component assets via Nexus API, then scan each package file."""
|
||||
import subprocess
|
||||
|
||||
api_url = (
|
||||
f"{config.nexus_url.rstrip('/')}/service/rest/v1/search"
|
||||
f"?repository={repository}&name={name}&version={version}&format=pypi"
|
||||
)
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["curl", "-sf", "-u", f"{config.nexus_username}:{config.nexus_password}", api_url],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=30,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
log.warning("Component lookup failed for %s==%s: %s", name, version, result.stderr)
|
||||
return
|
||||
data = json.loads(result.stdout)
|
||||
except Exception as e:
|
||||
log.warning("Component lookup error for %s==%s: %s", name, version, e)
|
||||
return
|
||||
|
||||
background_tasks.add_task(
|
||||
_scan_in_background, download_url, repository_name, format_, asset_path
|
||||
)
|
||||
items = data.get("items", [])
|
||||
if not items:
|
||||
log.warning("No items found in search for %s==%s", name, version)
|
||||
return
|
||||
|
||||
return {"status": "accepted", "package": asset_name, "action": action}
|
||||
|
||||
|
||||
def _extract_download_url(asset: dict, full_payload: dict) -> str | None:
|
||||
for key in ("downloadUrl", "download_url", "url"):
|
||||
val = asset.get(key)
|
||||
if val:
|
||||
return val
|
||||
return full_payload.get("downloadUrl") or full_payload.get("download_url")
|
||||
for item in items:
|
||||
for asset in item.get("assets", []):
|
||||
asset_path = _extract_asset_path(asset)
|
||||
if not asset_path or not _is_package_asset(asset_path):
|
||||
continue
|
||||
download_url = asset.get("downloadUrl") or _build_download_url(repository, asset_path)
|
||||
log.info("Scanning component asset: %s", asset_path)
|
||||
async for session in get_session():
|
||||
await harvest(download_url, repository, "pypi", asset_path, session)
|
||||
break
|
||||
|
||||
|
||||
async def _scan_in_background(
|
||||
|
||||
Reference in New Issue
Block a user