- S4: bump jinja2>=3.1.4, python-multipart>=0.0.18, httpx>=0.28.0
- S5: _detect_ecosystem — DEFAULT_ECOSYSTEM для неизвестных форматов
- S6: harvester — log.exception() вместо log.error()
- S8: _scan_component — urlencode параметров
- P1: scanner — proc.kill() при таймауте
- P3: api_packages — selectinload(Scan.findings), убран N+1
- P4+P5: утечка _url_locks и _llm_locks при early return
- P6: DB reaper — сброс {'status':'analyzing'} при старте
- UI: htmx-пагинация, фильтры не теряют flagged, 404 с layout
- UI: мобильные таблицы overflow-x, полная стата на дашборде
- UI: i18n статусов в _status_badge, urlencode package_name
- 16 новых тестов: analyze endpoint (6), scanner errors (4),
webhook signature (2), llm client (4)
105 lines
3.4 KiB
Python
105 lines
3.4 KiB
Python
"""LLM analysis client for GuardDog findings.
|
|
|
|
Supports any OpenAI-compatible API endpoint with configurable model.
|
|
"""
|
|
|
|
import asyncio
|
|
import json
|
|
|
|
import httpx
|
|
|
|
from ..config import config
|
|
from ..constants import LLM_ANALYSIS_SYSTEM_PROMPT, LLM_DEFAULT_TEMPERATURE, LLM_RESPONSE_FORMAT
|
|
from ..logging_setup import log
|
|
|
|
_llm_semaphore = asyncio.Semaphore(config.llm_max_concurrent)
|
|
|
|
|
|
def _build_user_message(finding: dict) -> str:
|
|
"""Build a concise prompt from a finding's data."""
|
|
rule = finding.get("rule", "unknown")
|
|
severity = finding.get("severity", "unknown")
|
|
message = finding.get("message", "")
|
|
location = finding.get("location", "")
|
|
code = finding.get("code", "")
|
|
|
|
prompt = f"Rule: {rule}\nSeverity: {severity}\nMessage: {message}\n"
|
|
if location:
|
|
prompt += f"Location: {location}\n"
|
|
if code:
|
|
prompt += f"Code snippet:\n```\n{code}\n```\n"
|
|
|
|
prompt += (
|
|
"\nAnalyse this finding and return JSON with keys: "
|
|
"verdict, summary, analysis, severity_rating."
|
|
)
|
|
return prompt
|
|
|
|
|
|
async def analyze_finding(finding_data: dict) -> dict | None:
|
|
"""Send a finding to the LLM for security analysis.
|
|
|
|
Returns parsed JSON dict on success, or None on failure.
|
|
"""
|
|
if not config.llm_api_key:
|
|
log.warning("LLM_API_KEY not set — skipping LLM analysis")
|
|
return None
|
|
|
|
url = f"{config.llm_api_base.rstrip('/')}/chat/completions"
|
|
headers = {
|
|
"Authorization": f"Bearer {config.llm_api_key}",
|
|
"Content-Type": "application/json",
|
|
}
|
|
payload = {
|
|
"model": config.llm_model,
|
|
"messages": [
|
|
{"role": "system", "content": LLM_ANALYSIS_SYSTEM_PROMPT},
|
|
{"role": "user", "content": _build_user_message(finding_data)},
|
|
],
|
|
"temperature": LLM_DEFAULT_TEMPERATURE,
|
|
"response_format": {"type": LLM_RESPONSE_FORMAT},
|
|
}
|
|
|
|
try:
|
|
async with _llm_semaphore:
|
|
async with httpx.AsyncClient(timeout=config.llm_timeout, headers=headers) as client:
|
|
resp = await client.post(url, json=payload)
|
|
resp.raise_for_status()
|
|
body = resp.json()
|
|
except httpx.TimeoutException:
|
|
log.error(
|
|
"LLM analysis timed out after %ds for rule=%s",
|
|
config.llm_timeout,
|
|
finding_data.get("rule"),
|
|
)
|
|
return None
|
|
except Exception as e:
|
|
log.warning("LLM analysis failed for rule=%s: %s", finding_data.get("rule"), e)
|
|
return None
|
|
|
|
try:
|
|
content = body["choices"][0]["message"]["content"]
|
|
return json.loads(content)
|
|
except (KeyError, IndexError, json.JSONDecodeError) as e:
|
|
raw = ""
|
|
try:
|
|
raw = body["choices"][0]["message"]["content"]
|
|
except (KeyError, IndexError):
|
|
raw = str(body)[:300]
|
|
# Some models wrap JSON in markdown code blocks
|
|
if isinstance(raw, str) and raw.strip().startswith("```"):
|
|
try:
|
|
stripped = raw.strip().strip("`").strip()
|
|
if stripped.startswith("json\n"):
|
|
stripped = stripped[5:]
|
|
return json.loads(stripped)
|
|
except json.JSONDecodeError:
|
|
pass
|
|
log.warning(
|
|
"LLM response parse error for rule=%s: %s — raw=%s",
|
|
finding_data.get("rule"),
|
|
e,
|
|
raw[:200] if isinstance(raw, str) else str(raw)[:200],
|
|
)
|
|
return None
|