fix: защита от дубликатов сканов — UPDATED-only + per-URL мьютекс

- constants.py: RELEVANT_WEBHOOK_ACTIONS теперь только UPDATED
  (CREATED игнорируется, Nexs proxy шлёт UPDATED при обновл кэша)
- harvester.py: asyncio.Lock на каждый download_url
  — при параллельных вебхуках только первый пройдёт, остальные skipped
  — lock проверяется + DB re-check внутри критической секции
- tests: обновлены фикстуры (CREATED→UPDATED), добавлен тест ignores_created
This commit is contained in:
Marker689
2026-05-10 05:47:35 +03:00
parent 1343c3f415
commit c4dcd79ecd
4 changed files with 33 additions and 19 deletions

View File

@@ -128,7 +128,7 @@ JSON_PATH_SEVERITY = "$.severity"
# Webhook
# ---------------------------------------------------------------------------
RELEVANT_WEBHOOK_ACTIONS = {"CREATED", "UPDATED"}
RELEVANT_WEBHOOK_ACTIONS = {"UPDATED"}
WEBHOOK_IGNORE_NON_PACKAGE = "non_package_asset"
WEBHOOK_IGNORE_NO_NAME_OR_VERSION = "no_name_or_version"

View File

@@ -1,5 +1,6 @@
"""Harvester: download a package from Nexus, scan it, store results."""
import asyncio
import datetime
import os
import shutil
@@ -20,6 +21,10 @@ from guarddog_nexus.models import Finding, Scan, ScanStatus
from guarddog_nexus.nexus_client import compute_sha256, download_asset, extract_pypi_info
from guarddog_nexus.scanner import scan_package
# Per-URL locks to avoid parallel scans of the same asset
_url_locks: dict[str, asyncio.Lock] = {}
_url_lock = asyncio.Lock()
async def harvest(
download_url: str,
@@ -42,6 +47,18 @@ async def harvest(
package_name, package_version = info
# Acquire per-URL lock to prevent parallel scans of the same asset
async with _url_lock:
if download_url not in _url_locks:
_url_locks[download_url] = asyncio.Lock()
lock = _url_locks[download_url]
if lock.locked():
log.info("URL already being processed, skipping: %s", download_url)
return None
async with lock:
# Re-check DB in case another task already created and finished a scan
active = await session.scalar(
select(Scan.id).where(
Scan.nexus_asset_url == download_url,

View File

@@ -99,7 +99,7 @@ def sample_nexus_webhook():
"timestamp": "2026-05-09T12:00:00.000+00:00",
"nodeId": "test-node",
"initiator": "admin",
"action": "CREATED",
"action": "UPDATED",
"repositoryName": "pypi-proxy",
"asset": {
"id": "abc123",
@@ -117,7 +117,7 @@ def sample_nexus_component_webhook():
"timestamp": "2026-05-09T12:00:00.000+00:00",
"nodeId": "test-node",
"initiator": "admin",
"action": "CREATED",
"action": "UPDATED",
"repositoryName": "pypi-proxy",
"component": {
"id": "comp1",

View File

@@ -24,14 +24,11 @@ async def test_webhook_ignores_deleted_action(client, sample_nexus_webhook):
@pytest.mark.asyncio
async def test_webhook_accepts_asset_created(client, sample_nexus_webhook):
with patch("guarddog_nexus.webhooks._scan_in_background") as _mock:
async def test_webhook_ignores_created_action(client, sample_nexus_webhook):
sample_nexus_webhook["action"] = "CREATED"
resp = await client.post("/webhooks/nexus", json=sample_nexus_webhook)
assert resp.status_code == 200
data = resp.json()
assert data["status"] == "accepted"
assert data["action"] == "CREATED"
assert "/packages/requests/2.31.0/requests-2.31.0.tar.gz" in data["asset"]
assert resp.json()["status"] == "ignored"
@pytest.mark.asyncio