refactor: uv-based deps, no nexus auth, LLM retries, lock cleanup, health checks, e2e tests
This commit is contained in:
@@ -28,6 +28,18 @@ _url_lock = asyncio.Lock()
|
||||
# Global semaphore to limit concurrent GuardDog processes
|
||||
_scan_semaphore = asyncio.Semaphore(config.max_concurrent_scans)
|
||||
|
||||
# Cleanup interval for unused locks (30 minutes)
|
||||
_LOCK_CLEANUP_INTERVAL = 1800
|
||||
|
||||
|
||||
async def _cleanup_url_locks():
|
||||
"""Periodically clean up unused URL locks to prevent memory leaks."""
|
||||
while True:
|
||||
await asyncio.sleep(_LOCK_CLEANUP_INTERVAL)
|
||||
for key in list(_url_locks.keys()):
|
||||
if not _url_locks[key].locked():
|
||||
_url_locks.pop(key, None)
|
||||
|
||||
|
||||
async def harvest(
|
||||
download_url: str,
|
||||
@@ -94,6 +106,7 @@ async def harvest(
|
||||
await session.commit()
|
||||
await session.refresh(scan)
|
||||
|
||||
tmpdir = None
|
||||
try:
|
||||
await asyncio.to_thread(os.makedirs, config.temp_dir, exist_ok=True)
|
||||
tmpdir = await asyncio.to_thread(tempfile.mkdtemp, dir=config.temp_dir)
|
||||
@@ -201,7 +214,8 @@ async def harvest(
|
||||
return scan
|
||||
|
||||
finally:
|
||||
await asyncio.to_thread(shutil.rmtree, tmpdir, ignore_errors=True)
|
||||
if tmpdir:
|
||||
await asyncio.to_thread(shutil.rmtree, tmpdir, ignore_errors=True)
|
||||
|
||||
|
||||
async def _run_llm_analysis(findings: list[Finding], session: AsyncSession) -> list[dict]:
|
||||
|
||||
@@ -36,15 +36,8 @@ def _build_user_message(finding: dict) -> str:
|
||||
return prompt
|
||||
|
||||
|
||||
async def analyze_finding(finding_data: dict) -> dict | None:
|
||||
"""Send a finding to the LLM for security analysis.
|
||||
|
||||
Returns parsed JSON dict on success, or None on failure.
|
||||
"""
|
||||
if not config.llm_api_key:
|
||||
log.warning("LLM_API_KEY not set — skipping LLM analysis")
|
||||
return None
|
||||
|
||||
async def _attempt_llm_call(finding_data: dict) -> dict | None:
|
||||
"""Single attempt to call LLM and parse response."""
|
||||
url = f"{config.llm_api_base.rstrip('/')}/chat/completions"
|
||||
headers = {
|
||||
"Authorization": f"Bearer {config.llm_api_key}",
|
||||
@@ -78,12 +71,21 @@ async def analyze_finding(finding_data: dict) -> dict | None:
|
||||
return None
|
||||
|
||||
try:
|
||||
content = body["choices"][0]["message"]["content"]
|
||||
choices = body.get("choices", [])
|
||||
if not choices:
|
||||
raise ValueError("Empty choices list")
|
||||
message = choices[0].get("message", {})
|
||||
content = message.get("content", "")
|
||||
if not content:
|
||||
raise ValueError("Empty message content")
|
||||
return json.loads(content)
|
||||
except (KeyError, IndexError, json.JSONDecodeError) as e:
|
||||
except (ValueError, json.JSONDecodeError) as e:
|
||||
raw = ""
|
||||
try:
|
||||
raw = body["choices"][0]["message"]["content"]
|
||||
choices = body.get("choices", [])
|
||||
if choices:
|
||||
message = choices[0].get("message", {})
|
||||
raw = message.get("content", "")
|
||||
except (KeyError, IndexError):
|
||||
raw = str(body)[:300]
|
||||
# Some models wrap JSON in markdown code blocks
|
||||
@@ -102,3 +104,32 @@ async def analyze_finding(finding_data: dict) -> dict | None:
|
||||
raw[:200] if isinstance(raw, str) else str(raw)[:200],
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
async def analyze_finding(finding_data: dict, max_retries: int = 3) -> dict | None:
|
||||
"""Send a finding to the LLM for security analysis with retry logic.
|
||||
|
||||
Returns parsed JSON dict on success, or None on failure.
|
||||
"""
|
||||
if not config.llm_api_key:
|
||||
log.warning("LLM_API_KEY not set — skipping LLM analysis")
|
||||
return None
|
||||
|
||||
for attempt in range(max_retries):
|
||||
result = await _attempt_llm_call(finding_data)
|
||||
if result is not None:
|
||||
return result
|
||||
if attempt < max_retries - 1:
|
||||
await asyncio.sleep(2**attempt * 2) # 2s, 4s, 8s
|
||||
log.info(
|
||||
"Retrying LLM analysis for rule=%s (attempt %d)",
|
||||
finding_data.get("rule"),
|
||||
attempt + 2,
|
||||
)
|
||||
|
||||
log.error(
|
||||
"LLM analysis failed after %d attempts for rule=%s",
|
||||
max_retries,
|
||||
finding_data.get("rule"),
|
||||
)
|
||||
return None
|
||||
|
||||
@@ -103,9 +103,8 @@ async def download_asset(download_url: str, dest_dir: str) -> str | None:
|
||||
"""Download an asset from Nexus using async httpx."""
|
||||
dest_path = os.path.join(dest_dir, os.path.basename(download_url.split("?")[0]))
|
||||
|
||||
auth = httpx.BasicAuth(config.nexus_username, config.nexus_password)
|
||||
async with httpx.AsyncClient(
|
||||
auth=auth, timeout=config.nexus_download_timeout, follow_redirects=True
|
||||
timeout=config.nexus_download_timeout, follow_redirects=True
|
||||
) as client:
|
||||
try:
|
||||
response = await client.get(download_url)
|
||||
@@ -124,9 +123,8 @@ def _write_file(path: str, content: bytes) -> None:
|
||||
|
||||
|
||||
async def nexus_get(path: str) -> httpx.Response:
|
||||
"""Make an authenticated GET request to Nexus REST API."""
|
||||
auth = httpx.BasicAuth(config.nexus_username, config.nexus_password)
|
||||
async with httpx.AsyncClient(auth=auth, timeout=config.nexus_api_timeout) as client:
|
||||
"""Make a GET request to Nexus REST API (anonymous access)."""
|
||||
async with httpx.AsyncClient(timeout=config.nexus_api_timeout) as client:
|
||||
return await client.get(f"{config.nexus_url.rstrip('/')}{path}")
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user