## Часть A: Вынос хардкода
- Новый модуль constants.py — все magic strings, лимиты, severity, ключи
(104 хардкод-значения централизованы)
- Новый модуль queries.py — общие SQL-запросы (build_scan_list_query,
build_package_list_query, get_dashboard_stats)
Убрана дупликация между api/*.py и web/routes.py (~90%)
- config.py: добавлены NLP_ENABLED, nexus_timeout, guarddog_binary,
log_syslog_facility, LLM-переменные
- nexus_client.py: таймауты из конфига, SHA256_CHUNK_SIZE из constants
- scanner.py: error-ключи из constants, GUARDDOG_OUTPUT_FORMAT из constants
- webhooks.py: RELEVANT_WEBHOOK_ACTIONS, METADATA_PATTERNS, ignore-строки
из constants
- logging_setup.py: конфигурируемый syslog facility, APP_PACKAGE из constants
- main.py: APP_NAME, APP_DESCRIPTION, APP_PACKAGE из constants
- models.py: поле report: JSON | None в Finding для LLM-отчётов
- harvester.py: авто-очистка tmpdir через finally; ERROR_MESSAGE_MAX_LENGTH
из constants; PACKAGE_EXTENSIONS вместо SUPPORTED_EXTENSIONS (с .gem)
- api/*.py + web/routes.py: используют build_*_query из queries.py,
константы для лимитов и сортировок
- tests/conftest.py: SEVERITY_WARNING, DEFAULT_ECOSYSTEM из constants
## Часть B: LLM-анализ finding'ов
- llm.py: клиент для OpenAI-совместимых API с промптом security-аналитика
- harvester.py: авто-триггер после flagged scan, сохранение report в БД
- api/findings.py: POST /{id}/analyze — ручной триггер
- web/routes.py: POST /api/v1/findings/{id}/analyze — HTMX-фрагмент
- _llm_report_fragment.html: шаблон фрагмента с вердиктом
- scan_detail.html, package_detail.html: кнопка Analyze with LLM
(htmx-post, spinner, inline-замена на LLM-отчёт)
- style.css: стили для .llm-report .verdict-safe/suspicious/malicious
## Часть C: Тесты
- 50 тестов, все зелёные
- Линтер чистый
- Тесты используют constants где нужно
88 lines
2.6 KiB
Python
88 lines
2.6 KiB
Python
"""LLM analysis client for GuardDog findings.
|
|
|
|
Supports any OpenAI-compatible API endpoint with configurable model.
|
|
"""
|
|
|
|
import json
|
|
|
|
import httpx
|
|
|
|
from guarddog_nexus.config import config
|
|
from guarddog_nexus.constants import LLM_ANALYSIS_SYSTEM_PROMPT
|
|
from guarddog_nexus.logging_setup import log
|
|
|
|
|
|
def _build_user_message(finding: dict) -> str:
|
|
"""Build a concise prompt from a finding's data."""
|
|
rule = finding.get("rule", "unknown")
|
|
severity = finding.get("severity", "unknown")
|
|
message = finding.get("message", "")
|
|
location = finding.get("location", "")
|
|
code = finding.get("code", "")
|
|
|
|
prompt = (
|
|
f"Rule: {rule}\n"
|
|
f"Severity: {severity}\n"
|
|
f"Message: {message}\n"
|
|
)
|
|
if location:
|
|
prompt += f"Location: {location}\n"
|
|
if code:
|
|
prompt += f"Code snippet:\n```\n{code}\n```\n"
|
|
|
|
prompt += (
|
|
"\nAnalyse this finding and return JSON with keys: "
|
|
"verdict, summary, analysis, severity_rating."
|
|
)
|
|
return prompt
|
|
|
|
|
|
async def analyze_finding(finding_data: dict) -> dict | None:
|
|
"""Send a finding to the LLM for security analysis.
|
|
|
|
Returns parsed JSON dict on success, or None on failure.
|
|
"""
|
|
if not config.llm_api_key:
|
|
log.warning("LLM_API_KEY not set — skipping LLM analysis")
|
|
return None
|
|
|
|
url = f"{config.llm_api_base.rstrip('/')}/chat/completions"
|
|
headers = {
|
|
"Authorization": f"Bearer {config.llm_api_key}",
|
|
"Content-Type": "application/json",
|
|
}
|
|
payload = {
|
|
"model": config.llm_model,
|
|
"messages": [
|
|
{"role": "system", "content": LLM_ANALYSIS_SYSTEM_PROMPT},
|
|
{"role": "user", "content": _build_user_message(finding_data)},
|
|
],
|
|
"temperature": 0.3,
|
|
"response_format": {"type": "json_object"},
|
|
}
|
|
|
|
try:
|
|
async with httpx.AsyncClient(
|
|
timeout=config.llm_timeout, headers=headers
|
|
) as client:
|
|
resp = await client.post(url, json=payload)
|
|
resp.raise_for_status()
|
|
body = resp.json()
|
|
except httpx.TimeoutException:
|
|
log.error(
|
|
"LLM analysis timed out after %ds for rule=%s",
|
|
config.llm_timeout,
|
|
finding_data.get("rule"),
|
|
)
|
|
return None
|
|
except Exception as e:
|
|
log.warning("LLM analysis failed for rule=%s: %s", finding_data.get("rule"), e)
|
|
return None
|
|
|
|
try:
|
|
content = body["choices"][0]["message"]["content"]
|
|
return json.loads(content)
|
|
except (KeyError, IndexError, json.JSONDecodeError) as e:
|
|
log.warning("LLM response parse error for rule=%s: %s", finding_data.get("rule"), e)
|
|
return None
|