feat: лимит конкурентных сканов через asyncio.Semaphore

- config.py: MAX_CONCURRENT_SCANS (default=4)
- harvester.py: глобальный _scan_semaphore оборачивает scan_package()
  — при N одновременных сканах, (N+1)-й будет ждать освобождения слота
  — download и SHA256 не лимитируются, только guarddog subprocess
- docker-compose.yml, .env.example: переменная добавлена
This commit is contained in:
Marker689
2026-05-10 05:52:23 +03:00
parent c4dcd79ecd
commit d23abe8b4b
5 changed files with 9 additions and 1 deletions

View File

@@ -23,6 +23,7 @@ WEBHOOK_SECRET=
SCAN_TIMEOUT_SECONDS=300
TEMP_DIR=/tmp/guarddog-nexus
GUARDDOG_BINARY=guarddog
MAX_CONCURRENT_SCANS=4
# Timeouts (seconds)
NEXUS_DOWNLOAD_TIMEOUT_SECONDS=120

View File

@@ -16,6 +16,7 @@ services:
LLM_API_KEY: "${LLM_API_KEY:-}"
LLM_MODEL: "${LLM_MODEL:-gpt-4o-mini}"
LLM_TIMEOUT_SECONDS: "${LLM_TIMEOUT_SECONDS:-30}"
MAX_CONCURRENT_SCANS: "${MAX_CONCURRENT_SCANS:-4}"
volumes:
- ./data:/data
depends_on:

View File

@@ -46,6 +46,7 @@ class Config:
scan_timeout_seconds: int = int(os.getenv("SCAN_TIMEOUT_SECONDS", "300"))
temp_dir: str = os.getenv("TEMP_DIR", "/tmp/guarddog-nexus")
guarddog_binary: str = os.getenv("GUARDDOG_BINARY", GUARDDOG_BINARY_FALLBACK)
max_concurrent_scans: int = int(os.getenv("MAX_CONCURRENT_SCANS", "4"))
# LLM analysis
llm_enabled: bool = os.getenv("LLM_ENABLED", "").lower() in ("1", "true", "yes")

View File

@@ -100,6 +100,7 @@ SHA256_HEX_LENGTH = 64
# ---------------------------------------------------------------------------
GUARDDOG_BINARY_FALLBACK = "guarddog"
DEFAULT_MAX_CONCURRENT_SCANS = 4
GUARDDOG_OUTPUT_KEY = "--output-format"
GUARDDOG_OUTPUT_FORMAT = "json"
GUARDDOG_RESULTS_KEY = "results"

View File

@@ -25,6 +25,9 @@ from guarddog_nexus.scanner import scan_package
_url_locks: dict[str, asyncio.Lock] = {}
_url_lock = asyncio.Lock()
# Global semaphore to limit concurrent GuardDog processes
_scan_semaphore = asyncio.Semaphore(config.max_concurrent_scans)
async def harvest(
download_url: str,
@@ -118,6 +121,7 @@ async def harvest(
return scan
log.info("Scanning %s==%s", package_name, package_version)
async with _scan_semaphore:
result = await scan_package(downloaded, ecosystem)
findings_list = result.get("findings", [])