feat: лимит конкурентных сканов через asyncio.Semaphore
- config.py: MAX_CONCURRENT_SCANS (default=4) - harvester.py: глобальный _scan_semaphore оборачивает scan_package() — при N одновременных сканах, (N+1)-й будет ждать освобождения слота — download и SHA256 не лимитируются, только guarddog subprocess - docker-compose.yml, .env.example: переменная добавлена
This commit is contained in:
@@ -23,6 +23,7 @@ WEBHOOK_SECRET=
|
|||||||
SCAN_TIMEOUT_SECONDS=300
|
SCAN_TIMEOUT_SECONDS=300
|
||||||
TEMP_DIR=/tmp/guarddog-nexus
|
TEMP_DIR=/tmp/guarddog-nexus
|
||||||
GUARDDOG_BINARY=guarddog
|
GUARDDOG_BINARY=guarddog
|
||||||
|
MAX_CONCURRENT_SCANS=4
|
||||||
|
|
||||||
# Timeouts (seconds)
|
# Timeouts (seconds)
|
||||||
NEXUS_DOWNLOAD_TIMEOUT_SECONDS=120
|
NEXUS_DOWNLOAD_TIMEOUT_SECONDS=120
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ services:
|
|||||||
LLM_API_KEY: "${LLM_API_KEY:-}"
|
LLM_API_KEY: "${LLM_API_KEY:-}"
|
||||||
LLM_MODEL: "${LLM_MODEL:-gpt-4o-mini}"
|
LLM_MODEL: "${LLM_MODEL:-gpt-4o-mini}"
|
||||||
LLM_TIMEOUT_SECONDS: "${LLM_TIMEOUT_SECONDS:-30}"
|
LLM_TIMEOUT_SECONDS: "${LLM_TIMEOUT_SECONDS:-30}"
|
||||||
|
MAX_CONCURRENT_SCANS: "${MAX_CONCURRENT_SCANS:-4}"
|
||||||
volumes:
|
volumes:
|
||||||
- ./data:/data
|
- ./data:/data
|
||||||
depends_on:
|
depends_on:
|
||||||
|
|||||||
@@ -46,6 +46,7 @@ class Config:
|
|||||||
scan_timeout_seconds: int = int(os.getenv("SCAN_TIMEOUT_SECONDS", "300"))
|
scan_timeout_seconds: int = int(os.getenv("SCAN_TIMEOUT_SECONDS", "300"))
|
||||||
temp_dir: str = os.getenv("TEMP_DIR", "/tmp/guarddog-nexus")
|
temp_dir: str = os.getenv("TEMP_DIR", "/tmp/guarddog-nexus")
|
||||||
guarddog_binary: str = os.getenv("GUARDDOG_BINARY", GUARDDOG_BINARY_FALLBACK)
|
guarddog_binary: str = os.getenv("GUARDDOG_BINARY", GUARDDOG_BINARY_FALLBACK)
|
||||||
|
max_concurrent_scans: int = int(os.getenv("MAX_CONCURRENT_SCANS", "4"))
|
||||||
|
|
||||||
# LLM analysis
|
# LLM analysis
|
||||||
llm_enabled: bool = os.getenv("LLM_ENABLED", "").lower() in ("1", "true", "yes")
|
llm_enabled: bool = os.getenv("LLM_ENABLED", "").lower() in ("1", "true", "yes")
|
||||||
|
|||||||
@@ -100,6 +100,7 @@ SHA256_HEX_LENGTH = 64
|
|||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
GUARDDOG_BINARY_FALLBACK = "guarddog"
|
GUARDDOG_BINARY_FALLBACK = "guarddog"
|
||||||
|
DEFAULT_MAX_CONCURRENT_SCANS = 4
|
||||||
GUARDDOG_OUTPUT_KEY = "--output-format"
|
GUARDDOG_OUTPUT_KEY = "--output-format"
|
||||||
GUARDDOG_OUTPUT_FORMAT = "json"
|
GUARDDOG_OUTPUT_FORMAT = "json"
|
||||||
GUARDDOG_RESULTS_KEY = "results"
|
GUARDDOG_RESULTS_KEY = "results"
|
||||||
|
|||||||
@@ -25,6 +25,9 @@ from guarddog_nexus.scanner import scan_package
|
|||||||
_url_locks: dict[str, asyncio.Lock] = {}
|
_url_locks: dict[str, asyncio.Lock] = {}
|
||||||
_url_lock = asyncio.Lock()
|
_url_lock = asyncio.Lock()
|
||||||
|
|
||||||
|
# Global semaphore to limit concurrent GuardDog processes
|
||||||
|
_scan_semaphore = asyncio.Semaphore(config.max_concurrent_scans)
|
||||||
|
|
||||||
|
|
||||||
async def harvest(
|
async def harvest(
|
||||||
download_url: str,
|
download_url: str,
|
||||||
@@ -118,7 +121,8 @@ async def harvest(
|
|||||||
return scan
|
return scan
|
||||||
|
|
||||||
log.info("Scanning %s==%s", package_name, package_version)
|
log.info("Scanning %s==%s", package_name, package_version)
|
||||||
result = await scan_package(downloaded, ecosystem)
|
async with _scan_semaphore:
|
||||||
|
result = await scan_package(downloaded, ecosystem)
|
||||||
|
|
||||||
findings_list = result.get("findings", [])
|
findings_list = result.get("findings", [])
|
||||||
created_findings: list[Finding] = []
|
created_findings: list[Finding] = []
|
||||||
|
|||||||
Reference in New Issue
Block a user