refactor: вынос хардкода + LLM-анализ finding'ов

## Часть A: Вынос хардкода - Новый модуль constants.py — все magic strings, лимиты, severity, ключи (104 хардкод-значения централизованы) - Новый модуль queries.py — общие SQL-запросы (build_scan_list_query, build_package_list_query, get_dashboard_stats) Убрана дупликация между api/*.py и web/routes.py (~90%) - config.py: добавлены NLP_ENABLED, nexus_timeout, guarddog_binary, log_syslog_facility, LLM-переменные - nexus_client.py: таймауты из конфига, SHA256_CHUNK_SIZE из constants - scanner.py: error-ключи из constants, GUARDDOG_OUTPUT_FORMAT из constants - webhooks.py: RELEVANT_WEBHOOK_ACTIONS, METADATA_PATTERNS, ignore-строки из constants - logging_setup.py: конфигурируемый syslog facility, APP_PACKAGE из constants - main.py: APP_NAME, APP_DESCRIPTION, APP_PACKAGE из constants - models.py: поле report: JSON | None в Finding для LLM-отчётов - harvester.py: авто-очистка tmpdir через finally; ERROR_MESSAGE_MAX_LENGTH из constants; PACKAGE_EXTENSIONS вместо SUPPORTED_EXTENSIONS (с .gem) - api/*.py + web/routes.py: используют build_*_query из queries.py, константы для лимитов и сортировок - tests/conftest.py: SEVERITY_WARNING, DEFAULT_ECOSYSTEM из constants ## Часть B: LLM-анализ finding'ов - llm.py: клиент для OpenAI-совместимых API с промптом security-аналитика - harvester.py: авто-триггер после flagged scan, сохранение report в БД - api/findings.py: POST /{id}/analyze — ручной триггер - web/routes.py: POST /api/v1/findings/{id}/analyze — HTMX-фрагмент - _llm_report_fragment.html: шаблон фрагмента с вердиктом - scan_detail.html, package_detail.html: кнопка Analyze with LLM (htmx-post, spinner, inline-замена на LLM-отчёт) - style.css: стили для .llm-report .verdict-safe/suspicious/malicious ## Часть C: Тесты - 50 тестов, все зелёные - Линтер чистый - Тесты используют constants где нужно
2026-05-10 04:37:07 +03:00
parent c43e7c4c9b
commit 834138368a
21 changed files with 1094 additions and 476 deletions
--- a/guarddog_nexus/llm.py
+++ b/guarddog_nexus/llm.py
@@ -0,0 +1,87 @@
+"""LLM analysis client for GuardDog findings.
+
+Supports any OpenAI-compatible API endpoint with configurable model.
+"""
+
+import json
+
+import httpx
+
+from guarddog_nexus.config import config
+from guarddog_nexus.constants import LLM_ANALYSIS_SYSTEM_PROMPT
+from guarddog_nexus.logging_setup import log
+
+
+def _build_user_message(finding: dict) -> str:
+    """Build a concise prompt from a finding's data."""
+    rule = finding.get("rule", "unknown")
+    severity = finding.get("severity", "unknown")
+    message = finding.get("message", "")
+    location = finding.get("location", "")
+    code = finding.get("code", "")
+
+    prompt = (
+        f"Rule: {rule}\n"
+        f"Severity: {severity}\n"
+        f"Message: {message}\n"
+    )
+    if location:
+        prompt += f"Location: {location}\n"
+    if code:
+        prompt += f"Code snippet:\n```\n{code}\n```\n"
+
+    prompt += (
+        "\nAnalyse this finding and return JSON with keys: "
+        "verdict, summary, analysis, severity_rating."
+    )
+    return prompt
+
+
+async def analyze_finding(finding_data: dict) -> dict | None:
+    """Send a finding to the LLM for security analysis.
+
+    Returns parsed JSON dict on success, or None on failure.
+    """
+    if not config.llm_api_key:
+        log.warning("LLM_API_KEY not set — skipping LLM analysis")
+        return None
+
+    url = f"{config.llm_api_base.rstrip('/')}/chat/completions"
+    headers = {
+        "Authorization": f"Bearer {config.llm_api_key}",
+        "Content-Type": "application/json",
+    }
+    payload = {
+        "model": config.llm_model,
+        "messages": [
+            {"role": "system", "content": LLM_ANALYSIS_SYSTEM_PROMPT},
+            {"role": "user", "content": _build_user_message(finding_data)},
+        ],
+        "temperature": 0.3,
+        "response_format": {"type": "json_object"},
+    }
+
+    try:
+        async with httpx.AsyncClient(
+            timeout=config.llm_timeout, headers=headers
+        ) as client:
+            resp = await client.post(url, json=payload)
+            resp.raise_for_status()
+            body = resp.json()
+    except httpx.TimeoutException:
+        log.error(
+            "LLM analysis timed out after %ds for rule=%s",
+            config.llm_timeout,
+            finding_data.get("rule"),
+        )
+        return None
+    except Exception as e:
+        log.warning("LLM analysis failed for rule=%s: %s", finding_data.get("rule"), e)
+        return None
+
+    try:
+        content = body["choices"][0]["message"]["content"]
+        return json.loads(content)
+    except (KeyError, IndexError, json.JSONDecodeError) as e:
+        log.warning("LLM response parse error for rule=%s: %s", finding_data.get("rule"), e)
+        return None