fix: защита от дубликатов сканов — UPDATED-only + per-URL мьютекс
- constants.py: RELEVANT_WEBHOOK_ACTIONS теперь только UPDATED (CREATED игнорируется, Nexs proxy шлёт UPDATED при обновл кэша) - harvester.py: asyncio.Lock на каждый download_url — при параллельных вебхуках только первый пройдёт, остальные skipped — lock проверяется + DB re-check внутри критической секции - tests: обновлены фикстуры (CREATED→UPDATED), добавлен тест ignores_created
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
"""Harvester: download a package from Nexus, scan it, store results."""
|
||||
|
||||
import asyncio
|
||||
import datetime
|
||||
import os
|
||||
import shutil
|
||||
@@ -20,6 +21,10 @@ from guarddog_nexus.models import Finding, Scan, ScanStatus
|
||||
from guarddog_nexus.nexus_client import compute_sha256, download_asset, extract_pypi_info
|
||||
from guarddog_nexus.scanner import scan_package
|
||||
|
||||
# Per-URL locks to avoid parallel scans of the same asset
|
||||
_url_locks: dict[str, asyncio.Lock] = {}
|
||||
_url_lock = asyncio.Lock()
|
||||
|
||||
|
||||
async def harvest(
|
||||
download_url: str,
|
||||
@@ -42,16 +47,28 @@ async def harvest(
|
||||
|
||||
package_name, package_version = info
|
||||
|
||||
active = await session.scalar(
|
||||
select(Scan.id).where(
|
||||
Scan.nexus_asset_url == download_url,
|
||||
Scan.status.in_([ScanStatus.PENDING.value, ScanStatus.SCANNING.value]),
|
||||
)
|
||||
)
|
||||
if active:
|
||||
log.info("Already scanning this URL, skipping")
|
||||
# Acquire per-URL lock to prevent parallel scans of the same asset
|
||||
async with _url_lock:
|
||||
if download_url not in _url_locks:
|
||||
_url_locks[download_url] = asyncio.Lock()
|
||||
|
||||
lock = _url_locks[download_url]
|
||||
if lock.locked():
|
||||
log.info("URL already being processed, skipping: %s", download_url)
|
||||
return None
|
||||
|
||||
async with lock:
|
||||
# Re-check DB in case another task already created and finished a scan
|
||||
active = await session.scalar(
|
||||
select(Scan.id).where(
|
||||
Scan.nexus_asset_url == download_url,
|
||||
Scan.status.in_([ScanStatus.PENDING.value, ScanStatus.SCANNING.value]),
|
||||
)
|
||||
)
|
||||
if active:
|
||||
log.info("Already scanning this URL, skipping")
|
||||
return None
|
||||
|
||||
scan = Scan(
|
||||
package_name=package_name,
|
||||
package_version=package_version,
|
||||
|
||||
Reference in New Issue
Block a user