fix: real nexus webhook format, atomic dedup, tested live
This commit is contained in:
@@ -4,7 +4,7 @@ import datetime
|
|||||||
import os
|
import os
|
||||||
import tempfile
|
import tempfile
|
||||||
|
|
||||||
from sqlalchemy import select
|
from sqlalchemy.exc import IntegrityError
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
from guarddog_nexus.config import config
|
from guarddog_nexus.config import config
|
||||||
@@ -41,17 +41,6 @@ async def harvest(
|
|||||||
|
|
||||||
package_name, package_version = info
|
package_name, package_version = info
|
||||||
|
|
||||||
existing = await session.scalar(
|
|
||||||
select(Scan.id).where(
|
|
||||||
Scan.package_name == package_name,
|
|
||||||
Scan.package_version == package_version,
|
|
||||||
Scan.repository == repository,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
if existing:
|
|
||||||
log.info("Already scanned %s==%s, skipping", package_name, package_version)
|
|
||||||
return None
|
|
||||||
|
|
||||||
scan = Scan(
|
scan = Scan(
|
||||||
package_name=package_name,
|
package_name=package_name,
|
||||||
package_version=package_version,
|
package_version=package_version,
|
||||||
@@ -61,7 +50,13 @@ async def harvest(
|
|||||||
status=ScanStatus.PENDING.value,
|
status=ScanStatus.PENDING.value,
|
||||||
)
|
)
|
||||||
session.add(scan)
|
session.add(scan)
|
||||||
|
try:
|
||||||
await session.commit()
|
await session.commit()
|
||||||
|
except IntegrityError:
|
||||||
|
await session.rollback()
|
||||||
|
log.info("Already scanned %s==%s (unique), skipping", package_name, package_version)
|
||||||
|
return None
|
||||||
|
|
||||||
await session.refresh(scan)
|
await session.refresh(scan)
|
||||||
|
|
||||||
os.makedirs(config.temp_dir, exist_ok=True)
|
os.makedirs(config.temp_dir, exist_ok=True)
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
import datetime
|
import datetime
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
|
|
||||||
from sqlalchemy import Boolean, DateTime, ForeignKey, Integer, String, Text, func
|
from sqlalchemy import Boolean, DateTime, ForeignKey, Integer, String, Text, UniqueConstraint, func
|
||||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||||
|
|
||||||
from guarddog_nexus.database import Base
|
from guarddog_nexus.database import Base
|
||||||
@@ -18,6 +18,9 @@ class ScanStatus(str, Enum):
|
|||||||
|
|
||||||
class Scan(Base):
|
class Scan(Base):
|
||||||
__tablename__ = "scans"
|
__tablename__ = "scans"
|
||||||
|
__table_args__ = (
|
||||||
|
UniqueConstraint("package_name", "package_version", "repository", name="uq_scan_pkg"),
|
||||||
|
)
|
||||||
|
|
||||||
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
||||||
package_name: Mapped[str] = mapped_column(String(255), nullable=False)
|
package_name: Mapped[str] = mapped_column(String(255), nullable=False)
|
||||||
|
|||||||
@@ -16,25 +16,41 @@ router = APIRouter(prefix="/webhooks", tags=["webhooks"])
|
|||||||
|
|
||||||
RELEVANT_ACTIONS = {"CREATED", "UPDATED"}
|
RELEVANT_ACTIONS = {"CREATED", "UPDATED"}
|
||||||
|
|
||||||
EXCLUDE_NAME_PATTERNS = [
|
METADATA_PATTERNS = [
|
||||||
re.compile(p)
|
re.compile(p)
|
||||||
for p in [
|
for p in [
|
||||||
r"^simple/",
|
r"^/?simple/",
|
||||||
r"\.html$",
|
r"\.html$",
|
||||||
r"\.json$",
|
r"\.json$",
|
||||||
r"\.xml$",
|
r"\.xml$",
|
||||||
r"index\.",
|
r"/?index\.",
|
||||||
r"\.rss$",
|
r"\.rss$",
|
||||||
r"\.atom$",
|
r"\.atom$",
|
||||||
]
|
]
|
||||||
]
|
]
|
||||||
|
|
||||||
|
PACKAGE_EXTENSIONS = (".tar.gz", ".tgz", ".whl", ".zip", ".gem")
|
||||||
|
|
||||||
def _should_skip_asset(filename: str) -> bool:
|
|
||||||
for pat in EXCLUDE_NAME_PATTERNS:
|
def _is_package_asset(name: str) -> bool:
|
||||||
if pat.search(filename):
|
for pat in METADATA_PATTERNS:
|
||||||
return True
|
if pat.search(name):
|
||||||
return False
|
return False
|
||||||
|
return name.endswith(PACKAGE_EXTENSIONS)
|
||||||
|
|
||||||
|
|
||||||
|
def _build_download_url(repo: str, asset_path: str) -> str:
|
||||||
|
base = config.nexus_url.rstrip("/")
|
||||||
|
asset_path = asset_path.lstrip("/")
|
||||||
|
return f"{base}/repository/{repo}/{asset_path}"
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_asset_path(asset: dict) -> str | None:
|
||||||
|
for key in ("path", "name"):
|
||||||
|
val = asset.get(key)
|
||||||
|
if val:
|
||||||
|
return val
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
@router.post("/nexus")
|
@router.post("/nexus")
|
||||||
@@ -65,49 +81,76 @@ async def nexus_webhook(
|
|||||||
|
|
||||||
action = data.get("action", "").upper()
|
action = data.get("action", "").upper()
|
||||||
if action not in RELEVANT_ACTIONS:
|
if action not in RELEVANT_ACTIONS:
|
||||||
log.debug("Ignoring action: %s", action)
|
|
||||||
return {"status": "ignored", "action": action}
|
return {"status": "ignored", "action": action}
|
||||||
|
|
||||||
asset = data.get("asset") or data.get("component") or data.get("repositoryComponent")
|
repository = data.get("repositoryName", "")
|
||||||
if not asset:
|
|
||||||
log.warning("Webhook payload has no asset/component")
|
|
||||||
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="No asset in payload")
|
|
||||||
|
|
||||||
asset_name = asset.get("name", "")
|
asset = data.get("asset")
|
||||||
if _should_skip_asset(asset_name):
|
component = data.get("component")
|
||||||
log.debug("Skipping metadata asset: %s", asset_name)
|
|
||||||
return {"status": "ignored", "reason": "metadata_asset"}
|
|
||||||
|
|
||||||
download_url = _extract_download_url(asset, data)
|
if asset:
|
||||||
if not download_url:
|
asset_path = _extract_asset_path(asset)
|
||||||
log.warning("Could not extract download URL from webhook")
|
if not asset_path or not _is_package_asset(asset_path):
|
||||||
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="No download URL")
|
return {"status": "ignored", "reason": "non_package_asset"}
|
||||||
|
|
||||||
repository_name = data.get("repositoryName", asset.get("repositoryName", ""))
|
download_url = asset.get("downloadUrl") or _build_download_url(repository, asset_path)
|
||||||
format_ = asset.get("format", "pypi")
|
|
||||||
asset_path = asset.get("path", download_url)
|
|
||||||
|
|
||||||
log.info(
|
log.info("Webhook: %s asset %s in %s", action, asset_path, repository)
|
||||||
"Webhook: %s %s in %s (%s)",
|
|
||||||
action,
|
background_tasks.add_task(_scan_in_background, download_url, repository, "pypi", asset_path)
|
||||||
asset_name,
|
return {"status": "accepted", "asset": asset_path, "action": action}
|
||||||
repository_name,
|
|
||||||
format_,
|
if component:
|
||||||
|
name = component.get("name", "")
|
||||||
|
version = component.get("version", "")
|
||||||
|
if not name or not version:
|
||||||
|
return {"status": "ignored", "reason": "no_name_or_version"}
|
||||||
|
|
||||||
|
# For component events, look up assets via Nexus REST API
|
||||||
|
background_tasks.add_task(_scan_component, repository, name, version)
|
||||||
|
return {"status": "accepted", "component": f"{name}=={version}", "action": action}
|
||||||
|
|
||||||
|
return {"status": "ignored", "reason": "no_asset_or_component"}
|
||||||
|
|
||||||
|
|
||||||
|
async def _scan_component(repository: str, name: str, version: str):
|
||||||
|
"""Look up component assets via Nexus API, then scan each package file."""
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
api_url = (
|
||||||
|
f"{config.nexus_url.rstrip('/')}/service/rest/v1/search"
|
||||||
|
f"?repository={repository}&name={name}&version={version}&format=pypi"
|
||||||
)
|
)
|
||||||
|
try:
|
||||||
background_tasks.add_task(
|
result = subprocess.run(
|
||||||
_scan_in_background, download_url, repository_name, format_, asset_path
|
["curl", "-sf", "-u", f"{config.nexus_username}:{config.nexus_password}", api_url],
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
timeout=30,
|
||||||
)
|
)
|
||||||
|
if result.returncode != 0:
|
||||||
|
log.warning("Component lookup failed for %s==%s: %s", name, version, result.stderr)
|
||||||
|
return
|
||||||
|
data = json.loads(result.stdout)
|
||||||
|
except Exception as e:
|
||||||
|
log.warning("Component lookup error for %s==%s: %s", name, version, e)
|
||||||
|
return
|
||||||
|
|
||||||
return {"status": "accepted", "package": asset_name, "action": action}
|
items = data.get("items", [])
|
||||||
|
if not items:
|
||||||
|
log.warning("No items found in search for %s==%s", name, version)
|
||||||
|
return
|
||||||
|
|
||||||
|
for item in items:
|
||||||
def _extract_download_url(asset: dict, full_payload: dict) -> str | None:
|
for asset in item.get("assets", []):
|
||||||
for key in ("downloadUrl", "download_url", "url"):
|
asset_path = _extract_asset_path(asset)
|
||||||
val = asset.get(key)
|
if not asset_path or not _is_package_asset(asset_path):
|
||||||
if val:
|
continue
|
||||||
return val
|
download_url = asset.get("downloadUrl") or _build_download_url(repository, asset_path)
|
||||||
return full_payload.get("downloadUrl") or full_payload.get("download_url")
|
log.info("Scanning component asset: %s", asset_path)
|
||||||
|
async for session in get_session():
|
||||||
|
await harvest(download_url, repository, "pypi", asset_path, session)
|
||||||
|
break
|
||||||
|
|
||||||
|
|
||||||
async def _scan_in_background(
|
async def _scan_in_background(
|
||||||
|
|||||||
@@ -67,14 +67,33 @@ def sample_nexus_webhook():
|
|||||||
"action": "CREATED",
|
"action": "CREATED",
|
||||||
"repositoryName": "pypi-proxy",
|
"repositoryName": "pypi-proxy",
|
||||||
"asset": {
|
"asset": {
|
||||||
"name": "requests-2.31.0.tar.gz",
|
"id": "abc123",
|
||||||
|
"assetId": "dGVzdA==",
|
||||||
"format": "pypi",
|
"format": "pypi",
|
||||||
"path": "packages/requests/2.31.0/requests-2.31.0.tar.gz",
|
"name": "/packages/requests/2.31.0/requests-2.31.0.tar.gz",
|
||||||
"downloadUrl": "http://nexus:8081/repository/pypi-proxy/packages/requests/2.31.0/requests-2.31.0.tar.gz",
|
"downloadUrl": "http://nexus:8081/repository/pypi-proxy/packages/requests/2.31.0/requests-2.31.0.tar.gz",
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def sample_nexus_component_webhook():
|
||||||
|
return {
|
||||||
|
"timestamp": "2026-05-09T12:00:00.000+00:00",
|
||||||
|
"nodeId": "test-node",
|
||||||
|
"initiator": "admin",
|
||||||
|
"action": "CREATED",
|
||||||
|
"repositoryName": "pypi-proxy",
|
||||||
|
"component": {
|
||||||
|
"id": "comp1",
|
||||||
|
"componentId": "dGVzdDI=",
|
||||||
|
"format": "pypi",
|
||||||
|
"name": "requests",
|
||||||
|
"version": "2.31.0",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def guarddog_output_clean():
|
def guarddog_output_clean():
|
||||||
return {
|
return {
|
||||||
|
|||||||
@@ -18,55 +18,70 @@ async def test_webhook_rejects_invalid_json(client):
|
|||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_webhook_ignores_deleted_action(client, sample_nexus_webhook):
|
async def test_webhook_ignores_deleted_action(client, sample_nexus_webhook):
|
||||||
sample_nexus_webhook["action"] = "DELETED"
|
sample_nexus_webhook["action"] = "DELETED"
|
||||||
resp = await client.post(
|
resp = await client.post("/webhooks/nexus", json=sample_nexus_webhook)
|
||||||
"/webhooks/nexus",
|
|
||||||
json=sample_nexus_webhook,
|
|
||||||
)
|
|
||||||
assert resp.status_code == 200
|
assert resp.status_code == 200
|
||||||
assert resp.json()["status"] == "ignored"
|
assert resp.json()["status"] == "ignored"
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_webhook_accepts_created(client, sample_nexus_webhook):
|
async def test_webhook_accepts_asset_created(client, sample_nexus_webhook):
|
||||||
with patch("guarddog_nexus.webhooks._scan_in_background") as _mock_scan:
|
with patch("guarddog_nexus.webhooks._scan_in_background") as _mock:
|
||||||
resp = await client.post(
|
resp = await client.post("/webhooks/nexus", json=sample_nexus_webhook)
|
||||||
"/webhooks/nexus",
|
|
||||||
json=sample_nexus_webhook,
|
|
||||||
)
|
|
||||||
assert resp.status_code == 200
|
assert resp.status_code == 200
|
||||||
data = resp.json()
|
data = resp.json()
|
||||||
assert data["status"] == "accepted"
|
assert data["status"] == "accepted"
|
||||||
assert data["package"] == "requests-2.31.0.tar.gz"
|
|
||||||
assert data["action"] == "CREATED"
|
assert data["action"] == "CREATED"
|
||||||
|
assert "/packages/requests/2.31.0/requests-2.31.0.tar.gz" in data["asset"]
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_webhook_accepts_updated(client, sample_nexus_webhook):
|
async def test_webhook_accepts_asset_updated(client, sample_nexus_webhook):
|
||||||
sample_nexus_webhook["action"] = "UPDATED"
|
sample_nexus_webhook["action"] = "UPDATED"
|
||||||
with patch("guarddog_nexus.webhooks._scan_in_background") as _mock_scan:
|
with patch("guarddog_nexus.webhooks._scan_in_background") as _mock:
|
||||||
resp = await client.post(
|
resp = await client.post("/webhooks/nexus", json=sample_nexus_webhook)
|
||||||
"/webhooks/nexus",
|
|
||||||
json=sample_nexus_webhook,
|
|
||||||
)
|
|
||||||
assert resp.status_code == 200
|
assert resp.status_code == 200
|
||||||
assert resp.json()["status"] == "accepted"
|
assert resp.json()["status"] == "accepted"
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_webhook_skips_metadata_assets(client, sample_nexus_webhook):
|
async def test_webhook_skips_metadata_assets(client, sample_nexus_webhook):
|
||||||
sample_nexus_webhook["asset"]["name"] = "index.html"
|
sample_nexus_webhook["asset"]["name"] = "/simple/requests/"
|
||||||
|
resp = await client.post("/webhooks/nexus", json=sample_nexus_webhook)
|
||||||
|
assert resp.status_code == 200
|
||||||
|
assert resp.json()["status"] == "ignored"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_webhook_skips_non_package_extension(client, sample_nexus_webhook):
|
||||||
|
sample_nexus_webhook["asset"]["name"] = "/some/path.json"
|
||||||
|
resp = await client.post("/webhooks/nexus", json=sample_nexus_webhook)
|
||||||
|
assert resp.status_code == 200
|
||||||
|
assert resp.json()["status"] == "ignored"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_webhook_no_asset_or_component(client):
|
||||||
resp = await client.post(
|
resp = await client.post(
|
||||||
"/webhooks/nexus",
|
"/webhooks/nexus",
|
||||||
json=sample_nexus_webhook,
|
json={"action": "CREATED", "repositoryName": "test"},
|
||||||
)
|
)
|
||||||
assert resp.status_code == 200
|
assert resp.status_code == 200
|
||||||
assert resp.json()["status"] == "ignored"
|
assert resp.json()["status"] == "ignored"
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_webhook_missing_asset(client):
|
async def test_webhook_accepts_component(client, sample_nexus_component_webhook):
|
||||||
resp = await client.post(
|
with patch("guarddog_nexus.webhooks._scan_component") as _mock:
|
||||||
"/webhooks/nexus",
|
resp = await client.post("/webhooks/nexus", json=sample_nexus_component_webhook)
|
||||||
json={"action": "CREATED", "repositoryName": "test"},
|
assert resp.status_code == 200
|
||||||
)
|
data = resp.json()
|
||||||
assert resp.status_code == 400
|
assert data["status"] == "accepted"
|
||||||
|
assert data["component"] == "requests==2.31.0"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_webhook_component_no_version(client, sample_nexus_component_webhook):
|
||||||
|
sample_nexus_component_webhook["component"]["version"] = ""
|
||||||
|
resp = await client.post("/webhooks/nexus", json=sample_nexus_component_webhook)
|
||||||
|
assert resp.status_code == 200
|
||||||
|
assert resp.json()["status"] == "ignored"
|
||||||
|
|||||||
Reference in New Issue
Block a user