fix: защита от дубликатов сканов — UPDATED-only + per-URL мьютекс
- constants.py: RELEVANT_WEBHOOK_ACTIONS теперь только UPDATED (CREATED игнорируется, Nexs proxy шлёт UPDATED при обновл кэша) - harvester.py: asyncio.Lock на каждый download_url — при параллельных вебхуках только первый пройдёт, остальные skipped — lock проверяется + DB re-check внутри критической секции - tests: обновлены фикстуры (CREATED→UPDATED), добавлен тест ignores_created
This commit is contained in:
@@ -128,7 +128,7 @@ JSON_PATH_SEVERITY = "$.severity"
|
|||||||
# Webhook
|
# Webhook
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
RELEVANT_WEBHOOK_ACTIONS = {"CREATED", "UPDATED"}
|
RELEVANT_WEBHOOK_ACTIONS = {"UPDATED"}
|
||||||
|
|
||||||
WEBHOOK_IGNORE_NON_PACKAGE = "non_package_asset"
|
WEBHOOK_IGNORE_NON_PACKAGE = "non_package_asset"
|
||||||
WEBHOOK_IGNORE_NO_NAME_OR_VERSION = "no_name_or_version"
|
WEBHOOK_IGNORE_NO_NAME_OR_VERSION = "no_name_or_version"
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
"""Harvester: download a package from Nexus, scan it, store results."""
|
"""Harvester: download a package from Nexus, scan it, store results."""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
import datetime
|
import datetime
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
@@ -20,6 +21,10 @@ from guarddog_nexus.models import Finding, Scan, ScanStatus
|
|||||||
from guarddog_nexus.nexus_client import compute_sha256, download_asset, extract_pypi_info
|
from guarddog_nexus.nexus_client import compute_sha256, download_asset, extract_pypi_info
|
||||||
from guarddog_nexus.scanner import scan_package
|
from guarddog_nexus.scanner import scan_package
|
||||||
|
|
||||||
|
# Per-URL locks to avoid parallel scans of the same asset
|
||||||
|
_url_locks: dict[str, asyncio.Lock] = {}
|
||||||
|
_url_lock = asyncio.Lock()
|
||||||
|
|
||||||
|
|
||||||
async def harvest(
|
async def harvest(
|
||||||
download_url: str,
|
download_url: str,
|
||||||
@@ -42,6 +47,18 @@ async def harvest(
|
|||||||
|
|
||||||
package_name, package_version = info
|
package_name, package_version = info
|
||||||
|
|
||||||
|
# Acquire per-URL lock to prevent parallel scans of the same asset
|
||||||
|
async with _url_lock:
|
||||||
|
if download_url not in _url_locks:
|
||||||
|
_url_locks[download_url] = asyncio.Lock()
|
||||||
|
|
||||||
|
lock = _url_locks[download_url]
|
||||||
|
if lock.locked():
|
||||||
|
log.info("URL already being processed, skipping: %s", download_url)
|
||||||
|
return None
|
||||||
|
|
||||||
|
async with lock:
|
||||||
|
# Re-check DB in case another task already created and finished a scan
|
||||||
active = await session.scalar(
|
active = await session.scalar(
|
||||||
select(Scan.id).where(
|
select(Scan.id).where(
|
||||||
Scan.nexus_asset_url == download_url,
|
Scan.nexus_asset_url == download_url,
|
||||||
|
|||||||
@@ -99,7 +99,7 @@ def sample_nexus_webhook():
|
|||||||
"timestamp": "2026-05-09T12:00:00.000+00:00",
|
"timestamp": "2026-05-09T12:00:00.000+00:00",
|
||||||
"nodeId": "test-node",
|
"nodeId": "test-node",
|
||||||
"initiator": "admin",
|
"initiator": "admin",
|
||||||
"action": "CREATED",
|
"action": "UPDATED",
|
||||||
"repositoryName": "pypi-proxy",
|
"repositoryName": "pypi-proxy",
|
||||||
"asset": {
|
"asset": {
|
||||||
"id": "abc123",
|
"id": "abc123",
|
||||||
@@ -117,7 +117,7 @@ def sample_nexus_component_webhook():
|
|||||||
"timestamp": "2026-05-09T12:00:00.000+00:00",
|
"timestamp": "2026-05-09T12:00:00.000+00:00",
|
||||||
"nodeId": "test-node",
|
"nodeId": "test-node",
|
||||||
"initiator": "admin",
|
"initiator": "admin",
|
||||||
"action": "CREATED",
|
"action": "UPDATED",
|
||||||
"repositoryName": "pypi-proxy",
|
"repositoryName": "pypi-proxy",
|
||||||
"component": {
|
"component": {
|
||||||
"id": "comp1",
|
"id": "comp1",
|
||||||
|
|||||||
@@ -24,14 +24,11 @@ async def test_webhook_ignores_deleted_action(client, sample_nexus_webhook):
|
|||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_webhook_accepts_asset_created(client, sample_nexus_webhook):
|
async def test_webhook_ignores_created_action(client, sample_nexus_webhook):
|
||||||
with patch("guarddog_nexus.webhooks._scan_in_background") as _mock:
|
sample_nexus_webhook["action"] = "CREATED"
|
||||||
resp = await client.post("/webhooks/nexus", json=sample_nexus_webhook)
|
resp = await client.post("/webhooks/nexus", json=sample_nexus_webhook)
|
||||||
assert resp.status_code == 200
|
assert resp.status_code == 200
|
||||||
data = resp.json()
|
assert resp.json()["status"] == "ignored"
|
||||||
assert data["status"] == "accepted"
|
|
||||||
assert data["action"] == "CREATED"
|
|
||||||
assert "/packages/requests/2.31.0/requests-2.31.0.tar.gz" in data["asset"]
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
|
|||||||
Reference in New Issue
Block a user