refactor: вынос хардкода + LLM-анализ finding'ов

## Часть A: Вынос хардкода
- Новый модуль constants.py — все magic strings, лимиты, severity, ключи
  (104 хардкод-значения централизованы)
- Новый модуль queries.py — общие SQL-запросы (build_scan_list_query,
  build_package_list_query, get_dashboard_stats)
  Убрана дупликация между api/*.py и web/routes.py (~90%)

- config.py: добавлены NLP_ENABLED, nexus_timeout, guarddog_binary,
  log_syslog_facility, LLM-переменные
- nexus_client.py: таймауты из конфига, SHA256_CHUNK_SIZE из constants
- scanner.py: error-ключи из constants, GUARDDOG_OUTPUT_FORMAT из constants
- webhooks.py: RELEVANT_WEBHOOK_ACTIONS, METADATA_PATTERNS, ignore-строки
  из constants
- logging_setup.py: конфигурируемый syslog facility, APP_PACKAGE из constants
- main.py: APP_NAME, APP_DESCRIPTION, APP_PACKAGE из constants
- models.py: поле report: JSON | None в Finding для LLM-отчётов
- harvester.py: авто-очистка tmpdir через finally; ERROR_MESSAGE_MAX_LENGTH
  из constants; PACKAGE_EXTENSIONS вместо SUPPORTED_EXTENSIONS (с .gem)
- api/*.py + web/routes.py: используют build_*_query из queries.py,
  константы для лимитов и сортировок
- tests/conftest.py: SEVERITY_WARNING, DEFAULT_ECOSYSTEM из constants

## Часть B: LLM-анализ finding'ов
- llm.py: клиент для OpenAI-совместимых API с промптом security-аналитика
- harvester.py: авто-триггер после flagged scan, сохранение report в БД
- api/findings.py: POST /{id}/analyze — ручной триггер
- web/routes.py: POST /api/v1/findings/{id}/analyze — HTMX-фрагмент
- _llm_report_fragment.html: шаблон фрагмента с вердиктом
- scan_detail.html, package_detail.html: кнопка Analyze with LLM
  (htmx-post, spinner, inline-замена на LLM-отчёт)
- style.css: стили для .llm-report .verdict-safe/suspicious/malicious

## Часть C: Тесты
- 50 тестов, все зелёные
- Линтер чистый
- Тесты используют constants где нужно
This commit is contained in:
Marker689
2026-05-10 04:37:07 +03:00
parent c43e7c4c9b
commit 834138368a
21 changed files with 1094 additions and 476 deletions

View File

@@ -1,38 +1,33 @@
"""Web UI routes — Jinja2 + htmx pages."""
import datetime
from fastapi import APIRouter, Depends, Request
from fastapi.responses import HTMLResponse
from jinja2 import Environment, PackageLoader, select_autoescape
from sqlalchemy import Integer, cast, func, select, text
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from guarddog_nexus.constants import (
APP_PACKAGE,
DEFAULT_SORT_BY_PACKAGES,
DEFAULT_SORT_BY_SCANS,
DEFAULT_SORT_DIR,
WEB_PER_PAGE,
)
from guarddog_nexus.database import get_session
from guarddog_nexus.models import Finding, Scan
from guarddog_nexus.queries import (
build_package_list_query,
build_scan_list_query,
get_dashboard_stats,
)
router = APIRouter(tags=["web"])
_jinja_env = Environment(
loader=PackageLoader("guarddog_nexus", "web/templates"),
loader=PackageLoader(APP_PACKAGE, "web/templates"),
autoescape=select_autoescape(),
)
SCAN_SORT_FIELDS = {
"id": Scan.id,
"package_name": Scan.package_name,
"started_at": Scan.started_at,
"status": Scan.status,
"total_findings": Scan.total_findings,
}
PACKAGE_SORT_FIELDS = {
"name": Scan.package_name,
"last_scanned_at": Scan.started_at,
"total_findings": Scan.total_findings,
"flagged": Scan.flagged,
}
def _render(name: str, **context) -> HTMLResponse:
template = _jinja_env.get_template(name)
@@ -41,113 +36,16 @@ def _render(name: str, **context) -> HTMLResponse:
@router.get("/", response_class=HTMLResponse)
async def dashboard(request: Request, session: AsyncSession = Depends(get_session)):
ctx = await _dashboard_data(session)
ctx = await get_dashboard_stats(session)
return _render("dashboard.html", **ctx, request=request)
@router.get("/dashboard/stats", response_class=HTMLResponse)
async def dashboard_stats_fragment(session: AsyncSession = Depends(get_session)):
ctx = await _dashboard_data(session)
ctx = await get_dashboard_stats(session)
return _render("dashboard_stats.html", **ctx)
async def _dashboard_data(session: AsyncSession) -> dict:
total_scans = await session.scalar(select(func.count(Scan.id)))
flagged_scans = await session.scalar(select(func.count(Scan.id)).where(Scan.flagged == True))
recent_flagged = await session.scalar(
select(func.count(Scan.id)).where(
Scan.flagged == True,
Scan.started_at >= func.datetime("now", "-7 days"),
)
)
total_findings = await session.scalar(select(func.count(Finding.id)))
warnings_count = await session.scalar(
select(func.count(Finding.id)).where(
func.json_extract(Finding.data, "$.severity") == "WARNING"
)
)
errors_count = await session.scalar(
select(func.count(Finding.id)).where(
func.json_extract(Finding.data, "$.severity") == "ERROR"
)
)
latest_flagged = (
(
await session.execute(
select(Scan).where(Scan.flagged == True).order_by(Scan.started_at.desc()).limit(8)
)
)
.scalars()
.all()
)
latest_scans = (
(await session.execute(select(Scan).order_by(Scan.started_at.desc()).limit(10)))
.scalars()
.all()
)
top_rules = (
await session.execute(
select(
func.json_extract(Finding.data, "$.rule").label("rule"),
func.count(Finding.id).label("cnt"),
)
.group_by(text("rule"))
.order_by(text("cnt DESC"))
.limit(10)
)
).all()
most_flagged = (
await session.execute(
select(
Scan.package_name,
Scan.package_version,
func.sum(Scan.total_findings).label("total"),
func.max(Scan.started_at).label("last_scan"),
)
.where(Scan.flagged == True)
.group_by(Scan.package_name, Scan.package_version)
.order_by(func.sum(Scan.total_findings).desc())
.limit(8)
)
).all()
max_findings = max((r.total for r in most_flagged), default=1)
days_raw = (
await session.execute(
select(
func.date(Scan.started_at).label("day"),
func.count(Scan.id).label("cnt"),
func.sum(cast(Scan.flagged, Integer)).label("flagged_cnt"),
)
.where(Scan.started_at >= func.datetime("now", "-14 days"))
.group_by("day")
.order_by("day")
)
).all()
return {
"total_scans": total_scans or 0,
"flagged_scans": flagged_scans or 0,
"recent_flagged": recent_flagged or 0,
"total_findings": total_findings or 0,
"warnings_count": warnings_count or 0,
"errors_count": errors_count or 0,
"latest_flagged": latest_flagged,
"latest_scans": latest_scans,
"top_rules": [(r.rule, r.cnt) for r in top_rules],
"most_flagged": most_flagged,
"max_findings": max_findings,
"days": [(d.day, d.cnt, d.flagged_cnt) for d in days_raw],
"now": datetime.datetime.now(datetime.timezone.utc),
}
@router.get("/scans", response_class=HTMLResponse)
async def scans_list(
request: Request,
@@ -155,32 +53,26 @@ async def scans_list(
flagged: str = "",
search: str = "",
status: str = "",
sort_by: str = "started_at",
sort_dir: str = "desc",
sort_by: str = DEFAULT_SORT_BY_SCANS,
sort_dir: str = DEFAULT_SORT_DIR,
session: AsyncSession = Depends(get_session),
):
per_page = 50
per_page = WEB_PER_PAGE
offset = (page - 1) * per_page
count_q = select(func.count(Scan.id))
q = select(Scan)
flagged_bool = None
if flagged == "1":
q = q.where(Scan.flagged == True)
count_q = count_q.where(Scan.flagged == True)
if status:
q = q.where(Scan.status == status)
count_q = count_q.where(Scan.status == status)
if search:
pattern = f"%{search}%"
condition = Scan.package_name.ilike(pattern) | Scan.package_version.ilike(pattern)
q = q.where(condition)
count_q = count_q.where(condition)
sort_field = SCAN_SORT_FIELDS.get(sort_by, Scan.started_at)
q = q.order_by(sort_field.desc() if sort_dir == "desc" else sort_field.asc())
q = q.offset(offset).limit(per_page)
flagged_bool = True
q, count_q = build_scan_list_query(
flagged=flagged_bool,
status=status or None,
search=search or None,
sort_by=sort_by,
sort_dir=sort_dir,
limit=per_page,
offset=offset,
)
scans = (await session.execute(q)).scalars().all()
total = await session.scalar(count_q)
@@ -200,11 +92,15 @@ async def scans_list(
@router.get("/scans/{scan_id}", response_class=HTMLResponse)
async def scan_detail(scan_id: int, request: Request, session: AsyncSession = Depends(get_session)):
async def scan_detail(
scan_id: int, request: Request, session: AsyncSession = Depends(get_session)
):
from sqlalchemy.orm import selectinload
scan = await session.scalar(
select(Scan).where(Scan.id == scan_id).options(selectinload(Scan.findings))
select(Scan)
.where(Scan.id == scan_id)
.options(selectinload(Scan.findings))
)
if not scan:
return HTMLResponse("<h1>Not found</h1>", status_code=404)
@@ -218,45 +114,27 @@ async def packages_list(
page: int = 1,
flagged: str = "",
search: str = "",
sort_by: str = "last_scanned_at",
sort_dir: str = "desc",
sort_by: str = DEFAULT_SORT_BY_PACKAGES,
sort_dir: str = DEFAULT_SORT_DIR,
session: AsyncSession = Depends(get_session),
):
per_page = 50
per_page = WEB_PER_PAGE
offset = (page - 1) * per_page
subq = select(
Scan.package_name.label("pkg_name"),
Scan.package_version.label("pkg_ver"),
Scan.ecosystem,
Scan.repository,
func.max(Scan.started_at).label("last_scan"),
func.max(Scan.flagged).label("is_flagged"),
func.sum(Scan.total_findings).label("findings_sum"),
func.max(Scan.id).label("sid"),
).group_by(Scan.package_name, Scan.package_version)
flagged_bool = None
if flagged == "1":
subq = subq.having(func.max(Scan.flagged) == True)
if search:
pattern = f"%{search}%"
subq = subq.where(
Scan.package_name.ilike(pattern) | Scan.package_version.ilike(pattern)
)
flagged_bool = True
sort_field = PACKAGE_SORT_FIELDS.get(sort_by, Scan.started_at)
sort_col = func.max(sort_field)
subq = subq.order_by(
sort_col.desc() if sort_dir == "desc" else sort_col.asc()
rows_q, total_q = build_package_list_query(
flagged=flagged_bool,
search=search or None,
sort_by=sort_by,
sort_dir=sort_dir,
limit=per_page,
offset=offset,
)
sq = subq.subquery()
total = await session.scalar(select(func.count()).select_from(sq))
rows = (
await session.execute(
select(sq).offset(offset).limit(per_page)
)
).all()
total = await session.scalar(total_q)
rows = (await session.execute(rows_q)).all()
return _render(
"packages_list.html",
@@ -309,3 +187,36 @@ async def package_detail(
findings=all_findings,
request=request,
)
@router.post("/api/v1/findings/{finding_id}/analyze", response_class=HTMLResponse)
async def analyze_finding_htmx(
finding_id: int,
session: AsyncSession = Depends(get_session),
):
"""HTMX fragment: trigger LLM analysis and return styled result HTML."""
from guarddog_nexus.config import config
from guarddog_nexus.llm import analyze_finding
if not config.llm_enabled:
return HTMLResponse(
'<div class="llm-actions"><small class="flagged">LLM analysis is disabled</small></div>'
)
finding = await session.scalar(select(Finding).where(Finding.id == finding_id))
if not finding:
return HTMLResponse(
'<div class="llm-actions"><small class="flagged">Finding not found</small></div>',
status_code=404,
)
report = await analyze_finding(finding.data)
if report is None:
return HTMLResponse(
'<div class="llm-actions"><small class="flagged">LLM analysis failed</small></div>'
)
finding.report = report
await session.commit()
return _render("_llm_report_fragment.html", report=report)