refactor: вынос хардкода + LLM-анализ finding'ов
## Часть A: Вынос хардкода
- Новый модуль constants.py — все magic strings, лимиты, severity, ключи
(104 хардкод-значения централизованы)
- Новый модуль queries.py — общие SQL-запросы (build_scan_list_query,
build_package_list_query, get_dashboard_stats)
Убрана дупликация между api/*.py и web/routes.py (~90%)
- config.py: добавлены NLP_ENABLED, nexus_timeout, guarddog_binary,
log_syslog_facility, LLM-переменные
- nexus_client.py: таймауты из конфига, SHA256_CHUNK_SIZE из constants
- scanner.py: error-ключи из constants, GUARDDOG_OUTPUT_FORMAT из constants
- webhooks.py: RELEVANT_WEBHOOK_ACTIONS, METADATA_PATTERNS, ignore-строки
из constants
- logging_setup.py: конфигурируемый syslog facility, APP_PACKAGE из constants
- main.py: APP_NAME, APP_DESCRIPTION, APP_PACKAGE из constants
- models.py: поле report: JSON | None в Finding для LLM-отчётов
- harvester.py: авто-очистка tmpdir через finally; ERROR_MESSAGE_MAX_LENGTH
из constants; PACKAGE_EXTENSIONS вместо SUPPORTED_EXTENSIONS (с .gem)
- api/*.py + web/routes.py: используют build_*_query из queries.py,
константы для лимитов и сортировок
- tests/conftest.py: SEVERITY_WARNING, DEFAULT_ECOSYSTEM из constants
## Часть B: LLM-анализ finding'ов
- llm.py: клиент для OpenAI-совместимых API с промптом security-аналитика
- harvester.py: авто-триггер после flagged scan, сохранение report в БД
- api/findings.py: POST /{id}/analyze — ручной триггер
- web/routes.py: POST /api/v1/findings/{id}/analyze — HTMX-фрагмент
- _llm_report_fragment.html: шаблон фрагмента с вердиктом
- scan_detail.html, package_detail.html: кнопка Analyze with LLM
(htmx-post, spinner, inline-замена на LLM-отчёт)
- style.css: стили для .llm-report .verdict-safe/suspicious/malicious
## Часть C: Тесты
- 50 тестов, все зелёные
- Линтер чистый
- Тесты используют constants где нужно
This commit is contained in:
87
guarddog_nexus/llm.py
Normal file
87
guarddog_nexus/llm.py
Normal file
@@ -0,0 +1,87 @@
|
||||
"""LLM analysis client for GuardDog findings.
|
||||
|
||||
Supports any OpenAI-compatible API endpoint with configurable model.
|
||||
"""
|
||||
|
||||
import json
|
||||
|
||||
import httpx
|
||||
|
||||
from guarddog_nexus.config import config
|
||||
from guarddog_nexus.constants import LLM_ANALYSIS_SYSTEM_PROMPT
|
||||
from guarddog_nexus.logging_setup import log
|
||||
|
||||
|
||||
def _build_user_message(finding: dict) -> str:
|
||||
"""Build a concise prompt from a finding's data."""
|
||||
rule = finding.get("rule", "unknown")
|
||||
severity = finding.get("severity", "unknown")
|
||||
message = finding.get("message", "")
|
||||
location = finding.get("location", "")
|
||||
code = finding.get("code", "")
|
||||
|
||||
prompt = (
|
||||
f"Rule: {rule}\n"
|
||||
f"Severity: {severity}\n"
|
||||
f"Message: {message}\n"
|
||||
)
|
||||
if location:
|
||||
prompt += f"Location: {location}\n"
|
||||
if code:
|
||||
prompt += f"Code snippet:\n```\n{code}\n```\n"
|
||||
|
||||
prompt += (
|
||||
"\nAnalyse this finding and return JSON with keys: "
|
||||
"verdict, summary, analysis, severity_rating."
|
||||
)
|
||||
return prompt
|
||||
|
||||
|
||||
async def analyze_finding(finding_data: dict) -> dict | None:
|
||||
"""Send a finding to the LLM for security analysis.
|
||||
|
||||
Returns parsed JSON dict on success, or None on failure.
|
||||
"""
|
||||
if not config.llm_api_key:
|
||||
log.warning("LLM_API_KEY not set — skipping LLM analysis")
|
||||
return None
|
||||
|
||||
url = f"{config.llm_api_base.rstrip('/')}/chat/completions"
|
||||
headers = {
|
||||
"Authorization": f"Bearer {config.llm_api_key}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
payload = {
|
||||
"model": config.llm_model,
|
||||
"messages": [
|
||||
{"role": "system", "content": LLM_ANALYSIS_SYSTEM_PROMPT},
|
||||
{"role": "user", "content": _build_user_message(finding_data)},
|
||||
],
|
||||
"temperature": 0.3,
|
||||
"response_format": {"type": "json_object"},
|
||||
}
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(
|
||||
timeout=config.llm_timeout, headers=headers
|
||||
) as client:
|
||||
resp = await client.post(url, json=payload)
|
||||
resp.raise_for_status()
|
||||
body = resp.json()
|
||||
except httpx.TimeoutException:
|
||||
log.error(
|
||||
"LLM analysis timed out after %ds for rule=%s",
|
||||
config.llm_timeout,
|
||||
finding_data.get("rule"),
|
||||
)
|
||||
return None
|
||||
except Exception as e:
|
||||
log.warning("LLM analysis failed for rule=%s: %s", finding_data.get("rule"), e)
|
||||
return None
|
||||
|
||||
try:
|
||||
content = body["choices"][0]["message"]["content"]
|
||||
return json.loads(content)
|
||||
except (KeyError, IndexError, json.JSONDecodeError) as e:
|
||||
log.warning("LLM response parse error for rule=%s: %s", finding_data.get("rule"), e)
|
||||
return None
|
||||
Reference in New Issue
Block a user