feat: лимит конкурентных сканов через asyncio.Semaphore

- config.py: MAX_CONCURRENT_SCANS (default=4)
- harvester.py: глобальный _scan_semaphore оборачивает scan_package()
  — при N одновременных сканах, (N+1)-й будет ждать освобождения слота
  — download и SHA256 не лимитируются, только guarddog subprocess
- docker-compose.yml, .env.example: переменная добавлена
This commit is contained in:
Marker689
2026-05-10 05:52:23 +03:00
parent c4dcd79ecd
commit d23abe8b4b
5 changed files with 9 additions and 1 deletions

View File

@@ -46,6 +46,7 @@ class Config:
scan_timeout_seconds: int = int(os.getenv("SCAN_TIMEOUT_SECONDS", "300"))
temp_dir: str = os.getenv("TEMP_DIR", "/tmp/guarddog-nexus")
guarddog_binary: str = os.getenv("GUARDDOG_BINARY", GUARDDOG_BINARY_FALLBACK)
max_concurrent_scans: int = int(os.getenv("MAX_CONCURRENT_SCANS", "4"))
# LLM analysis
llm_enabled: bool = os.getenv("LLM_ENABLED", "").lower() in ("1", "true", "yes")

View File

@@ -100,6 +100,7 @@ SHA256_HEX_LENGTH = 64
# ---------------------------------------------------------------------------
GUARDDOG_BINARY_FALLBACK = "guarddog"
DEFAULT_MAX_CONCURRENT_SCANS = 4
GUARDDOG_OUTPUT_KEY = "--output-format"
GUARDDOG_OUTPUT_FORMAT = "json"
GUARDDOG_RESULTS_KEY = "results"

View File

@@ -25,6 +25,9 @@ from guarddog_nexus.scanner import scan_package
_url_locks: dict[str, asyncio.Lock] = {}
_url_lock = asyncio.Lock()
# Global semaphore to limit concurrent GuardDog processes
_scan_semaphore = asyncio.Semaphore(config.max_concurrent_scans)
async def harvest(
download_url: str,
@@ -118,7 +121,8 @@ async def harvest(
return scan
log.info("Scanning %s==%s", package_name, package_version)
result = await scan_package(downloaded, ecosystem)
async with _scan_semaphore:
result = await scan_package(downloaded, ecosystem)
findings_list = result.get("findings", [])
created_findings: list[Finding] = []