refactor: uv-based deps, no nexus auth, LLM retries, lock cleanup, health checks, e2e tests

This commit is contained in:
Marker689
2026-05-11 19:27:56 +03:00
parent 698f02c8af
commit 04abe44ab4
20 changed files with 1583 additions and 51 deletions

View File

@@ -28,6 +28,18 @@ _url_lock = asyncio.Lock()
# Global semaphore to limit concurrent GuardDog processes
_scan_semaphore = asyncio.Semaphore(config.max_concurrent_scans)
# Cleanup interval for unused locks (30 minutes)
_LOCK_CLEANUP_INTERVAL = 1800
async def _cleanup_url_locks():
"""Periodically clean up unused URL locks to prevent memory leaks."""
while True:
await asyncio.sleep(_LOCK_CLEANUP_INTERVAL)
for key in list(_url_locks.keys()):
if not _url_locks[key].locked():
_url_locks.pop(key, None)
async def harvest(
download_url: str,
@@ -94,6 +106,7 @@ async def harvest(
await session.commit()
await session.refresh(scan)
tmpdir = None
try:
await asyncio.to_thread(os.makedirs, config.temp_dir, exist_ok=True)
tmpdir = await asyncio.to_thread(tempfile.mkdtemp, dir=config.temp_dir)
@@ -201,7 +214,8 @@ async def harvest(
return scan
finally:
await asyncio.to_thread(shutil.rmtree, tmpdir, ignore_errors=True)
if tmpdir:
await asyncio.to_thread(shutil.rmtree, tmpdir, ignore_errors=True)
async def _run_llm_analysis(findings: list[Finding], session: AsyncSession) -> list[dict]:

View File

@@ -36,15 +36,8 @@ def _build_user_message(finding: dict) -> str:
return prompt
async def analyze_finding(finding_data: dict) -> dict | None:
"""Send a finding to the LLM for security analysis.
Returns parsed JSON dict on success, or None on failure.
"""
if not config.llm_api_key:
log.warning("LLM_API_KEY not set — skipping LLM analysis")
return None
async def _attempt_llm_call(finding_data: dict) -> dict | None:
"""Single attempt to call LLM and parse response."""
url = f"{config.llm_api_base.rstrip('/')}/chat/completions"
headers = {
"Authorization": f"Bearer {config.llm_api_key}",
@@ -78,12 +71,21 @@ async def analyze_finding(finding_data: dict) -> dict | None:
return None
try:
content = body["choices"][0]["message"]["content"]
choices = body.get("choices", [])
if not choices:
raise ValueError("Empty choices list")
message = choices[0].get("message", {})
content = message.get("content", "")
if not content:
raise ValueError("Empty message content")
return json.loads(content)
except (KeyError, IndexError, json.JSONDecodeError) as e:
except (ValueError, json.JSONDecodeError) as e:
raw = ""
try:
raw = body["choices"][0]["message"]["content"]
choices = body.get("choices", [])
if choices:
message = choices[0].get("message", {})
raw = message.get("content", "")
except (KeyError, IndexError):
raw = str(body)[:300]
# Some models wrap JSON in markdown code blocks
@@ -102,3 +104,32 @@ async def analyze_finding(finding_data: dict) -> dict | None:
raw[:200] if isinstance(raw, str) else str(raw)[:200],
)
return None
async def analyze_finding(finding_data: dict, max_retries: int = 3) -> dict | None:
"""Send a finding to the LLM for security analysis with retry logic.
Returns parsed JSON dict on success, or None on failure.
"""
if not config.llm_api_key:
log.warning("LLM_API_KEY not set — skipping LLM analysis")
return None
for attempt in range(max_retries):
result = await _attempt_llm_call(finding_data)
if result is not None:
return result
if attempt < max_retries - 1:
await asyncio.sleep(2**attempt * 2) # 2s, 4s, 8s
log.info(
"Retrying LLM analysis for rule=%s (attempt %d)",
finding_data.get("rule"),
attempt + 2,
)
log.error(
"LLM analysis failed after %d attempts for rule=%s",
max_retries,
finding_data.get("rule"),
)
return None

View File

@@ -103,9 +103,8 @@ async def download_asset(download_url: str, dest_dir: str) -> str | None:
"""Download an asset from Nexus using async httpx."""
dest_path = os.path.join(dest_dir, os.path.basename(download_url.split("?")[0]))
auth = httpx.BasicAuth(config.nexus_username, config.nexus_password)
async with httpx.AsyncClient(
auth=auth, timeout=config.nexus_download_timeout, follow_redirects=True
timeout=config.nexus_download_timeout, follow_redirects=True
) as client:
try:
response = await client.get(download_url)
@@ -124,9 +123,8 @@ def _write_file(path: str, content: bytes) -> None:
async def nexus_get(path: str) -> httpx.Response:
"""Make an authenticated GET request to Nexus REST API."""
auth = httpx.BasicAuth(config.nexus_username, config.nexus_password)
async with httpx.AsyncClient(auth=auth, timeout=config.nexus_api_timeout) as client:
"""Make a GET request to Nexus REST API (anonymous access)."""
async with httpx.AsyncClient(timeout=config.nexus_api_timeout) as client:
return await client.get(f"{config.nexus_url.rstrip('/')}{path}")