Files
guarddog-nexus/guarddog_nexus/core/llm.py
Marker689 1341404568 fix: аудит — 19 фиксов безопасности, надёжности, UI и 16 новых тестов
- S4: bump jinja2>=3.1.4, python-multipart>=0.0.18, httpx>=0.28.0
- S5: _detect_ecosystem — DEFAULT_ECOSYSTEM для неизвестных форматов
- S6: harvester — log.exception() вместо log.error()
- S8: _scan_component — urlencode параметров
- P1: scanner — proc.kill() при таймауте
- P3: api_packages — selectinload(Scan.findings), убран N+1
- P4+P5: утечка _url_locks и _llm_locks при early return
- P6: DB reaper — сброс {'status':'analyzing'} при старте
- UI: htmx-пагинация, фильтры не теряют flagged, 404 с layout
- UI: мобильные таблицы overflow-x, полная стата на дашборде
- UI: i18n статусов в _status_badge, urlencode package_name
- 16 новых тестов: analyze endpoint (6), scanner errors (4),
  webhook signature (2), llm client (4)
2026-05-10 10:45:44 +03:00

105 lines
3.4 KiB
Python

"""LLM analysis client for GuardDog findings.
Supports any OpenAI-compatible API endpoint with configurable model.
"""
import asyncio
import json
import httpx
from ..config import config
from ..constants import LLM_ANALYSIS_SYSTEM_PROMPT, LLM_DEFAULT_TEMPERATURE, LLM_RESPONSE_FORMAT
from ..logging_setup import log
_llm_semaphore = asyncio.Semaphore(config.llm_max_concurrent)
def _build_user_message(finding: dict) -> str:
"""Build a concise prompt from a finding's data."""
rule = finding.get("rule", "unknown")
severity = finding.get("severity", "unknown")
message = finding.get("message", "")
location = finding.get("location", "")
code = finding.get("code", "")
prompt = f"Rule: {rule}\nSeverity: {severity}\nMessage: {message}\n"
if location:
prompt += f"Location: {location}\n"
if code:
prompt += f"Code snippet:\n```\n{code}\n```\n"
prompt += (
"\nAnalyse this finding and return JSON with keys: "
"verdict, summary, analysis, severity_rating."
)
return prompt
async def analyze_finding(finding_data: dict) -> dict | None:
"""Send a finding to the LLM for security analysis.
Returns parsed JSON dict on success, or None on failure.
"""
if not config.llm_api_key:
log.warning("LLM_API_KEY not set — skipping LLM analysis")
return None
url = f"{config.llm_api_base.rstrip('/')}/chat/completions"
headers = {
"Authorization": f"Bearer {config.llm_api_key}",
"Content-Type": "application/json",
}
payload = {
"model": config.llm_model,
"messages": [
{"role": "system", "content": LLM_ANALYSIS_SYSTEM_PROMPT},
{"role": "user", "content": _build_user_message(finding_data)},
],
"temperature": LLM_DEFAULT_TEMPERATURE,
"response_format": {"type": LLM_RESPONSE_FORMAT},
}
try:
async with _llm_semaphore:
async with httpx.AsyncClient(timeout=config.llm_timeout, headers=headers) as client:
resp = await client.post(url, json=payload)
resp.raise_for_status()
body = resp.json()
except httpx.TimeoutException:
log.error(
"LLM analysis timed out after %ds for rule=%s",
config.llm_timeout,
finding_data.get("rule"),
)
return None
except Exception as e:
log.warning("LLM analysis failed for rule=%s: %s", finding_data.get("rule"), e)
return None
try:
content = body["choices"][0]["message"]["content"]
return json.loads(content)
except (KeyError, IndexError, json.JSONDecodeError) as e:
raw = ""
try:
raw = body["choices"][0]["message"]["content"]
except (KeyError, IndexError):
raw = str(body)[:300]
# Some models wrap JSON in markdown code blocks
if isinstance(raw, str) and raw.strip().startswith("```"):
try:
stripped = raw.strip().strip("`").strip()
if stripped.startswith("json\n"):
stripped = stripped[5:]
return json.loads(stripped)
except json.JSONDecodeError:
pass
log.warning(
"LLM response parse error for rule=%s: %s — raw=%s",
finding_data.get("rule"),
e,
raw[:200] if isinstance(raw, str) else str(raw)[:200],
)
return None