From d23abe8b4bb2d3dba3c74183929eb3873acc29c2 Mon Sep 17 00:00:00 2001 From: Marker689 Date: Sun, 10 May 2026 05:52:23 +0300 Subject: [PATCH] =?UTF-8?q?feat:=20=D0=BB=D0=B8=D0=BC=D0=B8=D1=82=20=D0=BA?= =?UTF-8?q?=D0=BE=D0=BD=D0=BA=D1=83=D1=80=D0=B5=D0=BD=D1=82=D0=BD=D1=8B?= =?UTF-8?q?=D1=85=20=D1=81=D0=BA=D0=B0=D0=BD=D0=BE=D0=B2=20=D1=87=D0=B5?= =?UTF-8?q?=D1=80=D0=B5=D0=B7=20asyncio.Semaphore?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - config.py: MAX_CONCURRENT_SCANS (default=4) - harvester.py: глобальный _scan_semaphore оборачивает scan_package() — при N одновременных сканах, (N+1)-й будет ждать освобождения слота — download и SHA256 не лимитируются, только guarddog subprocess - docker-compose.yml, .env.example: переменная добавлена --- .env.example | 1 + docker-compose.yml | 1 + guarddog_nexus/config.py | 1 + guarddog_nexus/constants.py | 1 + guarddog_nexus/harvester.py | 6 +++++- 5 files changed, 9 insertions(+), 1 deletion(-) diff --git a/.env.example b/.env.example index fc32461..6f062f9 100644 --- a/.env.example +++ b/.env.example @@ -23,6 +23,7 @@ WEBHOOK_SECRET= SCAN_TIMEOUT_SECONDS=300 TEMP_DIR=/tmp/guarddog-nexus GUARDDOG_BINARY=guarddog +MAX_CONCURRENT_SCANS=4 # Timeouts (seconds) NEXUS_DOWNLOAD_TIMEOUT_SECONDS=120 diff --git a/docker-compose.yml b/docker-compose.yml index 4c65365..213a10b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -16,6 +16,7 @@ services: LLM_API_KEY: "${LLM_API_KEY:-}" LLM_MODEL: "${LLM_MODEL:-gpt-4o-mini}" LLM_TIMEOUT_SECONDS: "${LLM_TIMEOUT_SECONDS:-30}" + MAX_CONCURRENT_SCANS: "${MAX_CONCURRENT_SCANS:-4}" volumes: - ./data:/data depends_on: diff --git a/guarddog_nexus/config.py b/guarddog_nexus/config.py index ff91149..cbf1f65 100644 --- a/guarddog_nexus/config.py +++ b/guarddog_nexus/config.py @@ -46,6 +46,7 @@ class Config: scan_timeout_seconds: int = int(os.getenv("SCAN_TIMEOUT_SECONDS", "300")) temp_dir: str = os.getenv("TEMP_DIR", "/tmp/guarddog-nexus") guarddog_binary: str = os.getenv("GUARDDOG_BINARY", GUARDDOG_BINARY_FALLBACK) + max_concurrent_scans: int = int(os.getenv("MAX_CONCURRENT_SCANS", "4")) # LLM analysis llm_enabled: bool = os.getenv("LLM_ENABLED", "").lower() in ("1", "true", "yes") diff --git a/guarddog_nexus/constants.py b/guarddog_nexus/constants.py index 24250c5..a6f8b95 100644 --- a/guarddog_nexus/constants.py +++ b/guarddog_nexus/constants.py @@ -100,6 +100,7 @@ SHA256_HEX_LENGTH = 64 # --------------------------------------------------------------------------- GUARDDOG_BINARY_FALLBACK = "guarddog" +DEFAULT_MAX_CONCURRENT_SCANS = 4 GUARDDOG_OUTPUT_KEY = "--output-format" GUARDDOG_OUTPUT_FORMAT = "json" GUARDDOG_RESULTS_KEY = "results" diff --git a/guarddog_nexus/harvester.py b/guarddog_nexus/harvester.py index 90e7df4..3f43f1b 100644 --- a/guarddog_nexus/harvester.py +++ b/guarddog_nexus/harvester.py @@ -25,6 +25,9 @@ from guarddog_nexus.scanner import scan_package _url_locks: dict[str, asyncio.Lock] = {} _url_lock = asyncio.Lock() +# Global semaphore to limit concurrent GuardDog processes +_scan_semaphore = asyncio.Semaphore(config.max_concurrent_scans) + async def harvest( download_url: str, @@ -118,7 +121,8 @@ async def harvest( return scan log.info("Scanning %s==%s", package_name, package_version) - result = await scan_package(downloaded, ecosystem) + async with _scan_semaphore: + result = await scan_package(downloaded, ecosystem) findings_list = result.get("findings", []) created_findings: list[Finding] = []