From c4dcd79ecd86c94745cdfbb492f87150d559c6d4 Mon Sep 17 00:00:00 2001 From: Marker689 Date: Sun, 10 May 2026 05:47:35 +0300 Subject: [PATCH] =?UTF-8?q?fix:=20=D0=B7=D0=B0=D1=89=D0=B8=D1=82=D0=B0=20?= =?UTF-8?q?=D0=BE=D1=82=20=D0=B4=D1=83=D0=B1=D0=BB=D0=B8=D0=BA=D0=B0=D1=82?= =?UTF-8?q?=D0=BE=D0=B2=20=D1=81=D0=BA=D0=B0=D0=BD=D0=BE=D0=B2=20=E2=80=94?= =?UTF-8?q?=20UPDATED-only=20+=20per-URL=20=D0=BC=D1=8C=D1=8E=D1=82=D0=B5?= =?UTF-8?q?=D0=BA=D1=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - constants.py: RELEVANT_WEBHOOK_ACTIONS теперь только UPDATED (CREATED игнорируется, Nexs proxy шлёт UPDATED при обновл кэша) - harvester.py: asyncio.Lock на каждый download_url — при параллельных вебхуках только первый пройдёт, остальные skipped — lock проверяется + DB re-check внутри критической секции - tests: обновлены фикстуры (CREATED→UPDATED), добавлен тест ignores_created --- guarddog_nexus/constants.py | 2 +- guarddog_nexus/harvester.py | 33 +++++++++++++++++++++++++-------- tests/conftest.py | 4 ++-- tests/test_webhooks.py | 13 +++++-------- 4 files changed, 33 insertions(+), 19 deletions(-) diff --git a/guarddog_nexus/constants.py b/guarddog_nexus/constants.py index a66e4cf..24250c5 100644 --- a/guarddog_nexus/constants.py +++ b/guarddog_nexus/constants.py @@ -128,7 +128,7 @@ JSON_PATH_SEVERITY = "$.severity" # Webhook # --------------------------------------------------------------------------- -RELEVANT_WEBHOOK_ACTIONS = {"CREATED", "UPDATED"} +RELEVANT_WEBHOOK_ACTIONS = {"UPDATED"} WEBHOOK_IGNORE_NON_PACKAGE = "non_package_asset" WEBHOOK_IGNORE_NO_NAME_OR_VERSION = "no_name_or_version" diff --git a/guarddog_nexus/harvester.py b/guarddog_nexus/harvester.py index 2d6bf21..90e7df4 100644 --- a/guarddog_nexus/harvester.py +++ b/guarddog_nexus/harvester.py @@ -1,5 +1,6 @@ """Harvester: download a package from Nexus, scan it, store results.""" +import asyncio import datetime import os import shutil @@ -20,6 +21,10 @@ from guarddog_nexus.models import Finding, Scan, ScanStatus from guarddog_nexus.nexus_client import compute_sha256, download_asset, extract_pypi_info from guarddog_nexus.scanner import scan_package +# Per-URL locks to avoid parallel scans of the same asset +_url_locks: dict[str, asyncio.Lock] = {} +_url_lock = asyncio.Lock() + async def harvest( download_url: str, @@ -42,16 +47,28 @@ async def harvest( package_name, package_version = info - active = await session.scalar( - select(Scan.id).where( - Scan.nexus_asset_url == download_url, - Scan.status.in_([ScanStatus.PENDING.value, ScanStatus.SCANNING.value]), - ) - ) - if active: - log.info("Already scanning this URL, skipping") + # Acquire per-URL lock to prevent parallel scans of the same asset + async with _url_lock: + if download_url not in _url_locks: + _url_locks[download_url] = asyncio.Lock() + + lock = _url_locks[download_url] + if lock.locked(): + log.info("URL already being processed, skipping: %s", download_url) return None + async with lock: + # Re-check DB in case another task already created and finished a scan + active = await session.scalar( + select(Scan.id).where( + Scan.nexus_asset_url == download_url, + Scan.status.in_([ScanStatus.PENDING.value, ScanStatus.SCANNING.value]), + ) + ) + if active: + log.info("Already scanning this URL, skipping") + return None + scan = Scan( package_name=package_name, package_version=package_version, diff --git a/tests/conftest.py b/tests/conftest.py index 3fc582d..be65ca0 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -99,7 +99,7 @@ def sample_nexus_webhook(): "timestamp": "2026-05-09T12:00:00.000+00:00", "nodeId": "test-node", "initiator": "admin", - "action": "CREATED", + "action": "UPDATED", "repositoryName": "pypi-proxy", "asset": { "id": "abc123", @@ -117,7 +117,7 @@ def sample_nexus_component_webhook(): "timestamp": "2026-05-09T12:00:00.000+00:00", "nodeId": "test-node", "initiator": "admin", - "action": "CREATED", + "action": "UPDATED", "repositoryName": "pypi-proxy", "component": { "id": "comp1", diff --git a/tests/test_webhooks.py b/tests/test_webhooks.py index b0aca44..b81848c 100644 --- a/tests/test_webhooks.py +++ b/tests/test_webhooks.py @@ -24,14 +24,11 @@ async def test_webhook_ignores_deleted_action(client, sample_nexus_webhook): @pytest.mark.asyncio -async def test_webhook_accepts_asset_created(client, sample_nexus_webhook): - with patch("guarddog_nexus.webhooks._scan_in_background") as _mock: - resp = await client.post("/webhooks/nexus", json=sample_nexus_webhook) - assert resp.status_code == 200 - data = resp.json() - assert data["status"] == "accepted" - assert data["action"] == "CREATED" - assert "/packages/requests/2.31.0/requests-2.31.0.tar.gz" in data["asset"] +async def test_webhook_ignores_created_action(client, sample_nexus_webhook): + sample_nexus_webhook["action"] = "CREATED" + resp = await client.post("/webhooks/nexus", json=sample_nexus_webhook) + assert resp.status_code == 200 + assert resp.json()["status"] == "ignored" @pytest.mark.asyncio