refactor: вынос хардкода + LLM-анализ finding'ов

## Часть A: Вынос хардкода
- Новый модуль constants.py — все magic strings, лимиты, severity, ключи
  (104 хардкод-значения централизованы)
- Новый модуль queries.py — общие SQL-запросы (build_scan_list_query,
  build_package_list_query, get_dashboard_stats)
  Убрана дупликация между api/*.py и web/routes.py (~90%)

- config.py: добавлены NLP_ENABLED, nexus_timeout, guarddog_binary,
  log_syslog_facility, LLM-переменные
- nexus_client.py: таймауты из конфига, SHA256_CHUNK_SIZE из constants
- scanner.py: error-ключи из constants, GUARDDOG_OUTPUT_FORMAT из constants
- webhooks.py: RELEVANT_WEBHOOK_ACTIONS, METADATA_PATTERNS, ignore-строки
  из constants
- logging_setup.py: конфигурируемый syslog facility, APP_PACKAGE из constants
- main.py: APP_NAME, APP_DESCRIPTION, APP_PACKAGE из constants
- models.py: поле report: JSON | None в Finding для LLM-отчётов
- harvester.py: авто-очистка tmpdir через finally; ERROR_MESSAGE_MAX_LENGTH
  из constants; PACKAGE_EXTENSIONS вместо SUPPORTED_EXTENSIONS (с .gem)
- api/*.py + web/routes.py: используют build_*_query из queries.py,
  константы для лимитов и сортировок
- tests/conftest.py: SEVERITY_WARNING, DEFAULT_ECOSYSTEM из constants

## Часть B: LLM-анализ finding'ов
- llm.py: клиент для OpenAI-совместимых API с промптом security-аналитика
- harvester.py: авто-триггер после flagged scan, сохранение report в БД
- api/findings.py: POST /{id}/analyze — ручной триггер
- web/routes.py: POST /api/v1/findings/{id}/analyze — HTMX-фрагмент
- _llm_report_fragment.html: шаблон фрагмента с вердиктом
- scan_detail.html, package_detail.html: кнопка Analyze with LLM
  (htmx-post, spinner, inline-замена на LLM-отчёт)
- style.css: стили для .llm-report .verdict-safe/suspicious/malicious

## Часть C: Тесты
- 50 тестов, все зелёные
- Линтер чистый
- Тесты используют constants где нужно
This commit is contained in:
Marker689
2026-05-10 04:37:07 +03:00
parent c43e7c4c9b
commit 834138368a
21 changed files with 1094 additions and 476 deletions

View File

@@ -1,38 +1,33 @@
"""Web UI routes — Jinja2 + htmx pages."""
import datetime
from fastapi import APIRouter, Depends, Request
from fastapi.responses import HTMLResponse
from jinja2 import Environment, PackageLoader, select_autoescape
from sqlalchemy import Integer, cast, func, select, text
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from guarddog_nexus.constants import (
APP_PACKAGE,
DEFAULT_SORT_BY_PACKAGES,
DEFAULT_SORT_BY_SCANS,
DEFAULT_SORT_DIR,
WEB_PER_PAGE,
)
from guarddog_nexus.database import get_session
from guarddog_nexus.models import Finding, Scan
from guarddog_nexus.queries import (
build_package_list_query,
build_scan_list_query,
get_dashboard_stats,
)
router = APIRouter(tags=["web"])
_jinja_env = Environment(
loader=PackageLoader("guarddog_nexus", "web/templates"),
loader=PackageLoader(APP_PACKAGE, "web/templates"),
autoescape=select_autoescape(),
)
SCAN_SORT_FIELDS = {
"id": Scan.id,
"package_name": Scan.package_name,
"started_at": Scan.started_at,
"status": Scan.status,
"total_findings": Scan.total_findings,
}
PACKAGE_SORT_FIELDS = {
"name": Scan.package_name,
"last_scanned_at": Scan.started_at,
"total_findings": Scan.total_findings,
"flagged": Scan.flagged,
}
def _render(name: str, **context) -> HTMLResponse:
template = _jinja_env.get_template(name)
@@ -41,113 +36,16 @@ def _render(name: str, **context) -> HTMLResponse:
@router.get("/", response_class=HTMLResponse)
async def dashboard(request: Request, session: AsyncSession = Depends(get_session)):
ctx = await _dashboard_data(session)
ctx = await get_dashboard_stats(session)
return _render("dashboard.html", **ctx, request=request)
@router.get("/dashboard/stats", response_class=HTMLResponse)
async def dashboard_stats_fragment(session: AsyncSession = Depends(get_session)):
ctx = await _dashboard_data(session)
ctx = await get_dashboard_stats(session)
return _render("dashboard_stats.html", **ctx)
async def _dashboard_data(session: AsyncSession) -> dict:
total_scans = await session.scalar(select(func.count(Scan.id)))
flagged_scans = await session.scalar(select(func.count(Scan.id)).where(Scan.flagged == True))
recent_flagged = await session.scalar(
select(func.count(Scan.id)).where(
Scan.flagged == True,
Scan.started_at >= func.datetime("now", "-7 days"),
)
)
total_findings = await session.scalar(select(func.count(Finding.id)))
warnings_count = await session.scalar(
select(func.count(Finding.id)).where(
func.json_extract(Finding.data, "$.severity") == "WARNING"
)
)
errors_count = await session.scalar(
select(func.count(Finding.id)).where(
func.json_extract(Finding.data, "$.severity") == "ERROR"
)
)
latest_flagged = (
(
await session.execute(
select(Scan).where(Scan.flagged == True).order_by(Scan.started_at.desc()).limit(8)
)
)
.scalars()
.all()
)
latest_scans = (
(await session.execute(select(Scan).order_by(Scan.started_at.desc()).limit(10)))
.scalars()
.all()
)
top_rules = (
await session.execute(
select(
func.json_extract(Finding.data, "$.rule").label("rule"),
func.count(Finding.id).label("cnt"),
)
.group_by(text("rule"))
.order_by(text("cnt DESC"))
.limit(10)
)
).all()
most_flagged = (
await session.execute(
select(
Scan.package_name,
Scan.package_version,
func.sum(Scan.total_findings).label("total"),
func.max(Scan.started_at).label("last_scan"),
)
.where(Scan.flagged == True)
.group_by(Scan.package_name, Scan.package_version)
.order_by(func.sum(Scan.total_findings).desc())
.limit(8)
)
).all()
max_findings = max((r.total for r in most_flagged), default=1)
days_raw = (
await session.execute(
select(
func.date(Scan.started_at).label("day"),
func.count(Scan.id).label("cnt"),
func.sum(cast(Scan.flagged, Integer)).label("flagged_cnt"),
)
.where(Scan.started_at >= func.datetime("now", "-14 days"))
.group_by("day")
.order_by("day")
)
).all()
return {
"total_scans": total_scans or 0,
"flagged_scans": flagged_scans or 0,
"recent_flagged": recent_flagged or 0,
"total_findings": total_findings or 0,
"warnings_count": warnings_count or 0,
"errors_count": errors_count or 0,
"latest_flagged": latest_flagged,
"latest_scans": latest_scans,
"top_rules": [(r.rule, r.cnt) for r in top_rules],
"most_flagged": most_flagged,
"max_findings": max_findings,
"days": [(d.day, d.cnt, d.flagged_cnt) for d in days_raw],
"now": datetime.datetime.now(datetime.timezone.utc),
}
@router.get("/scans", response_class=HTMLResponse)
async def scans_list(
request: Request,
@@ -155,32 +53,26 @@ async def scans_list(
flagged: str = "",
search: str = "",
status: str = "",
sort_by: str = "started_at",
sort_dir: str = "desc",
sort_by: str = DEFAULT_SORT_BY_SCANS,
sort_dir: str = DEFAULT_SORT_DIR,
session: AsyncSession = Depends(get_session),
):
per_page = 50
per_page = WEB_PER_PAGE
offset = (page - 1) * per_page
count_q = select(func.count(Scan.id))
q = select(Scan)
flagged_bool = None
if flagged == "1":
q = q.where(Scan.flagged == True)
count_q = count_q.where(Scan.flagged == True)
if status:
q = q.where(Scan.status == status)
count_q = count_q.where(Scan.status == status)
if search:
pattern = f"%{search}%"
condition = Scan.package_name.ilike(pattern) | Scan.package_version.ilike(pattern)
q = q.where(condition)
count_q = count_q.where(condition)
sort_field = SCAN_SORT_FIELDS.get(sort_by, Scan.started_at)
q = q.order_by(sort_field.desc() if sort_dir == "desc" else sort_field.asc())
q = q.offset(offset).limit(per_page)
flagged_bool = True
q, count_q = build_scan_list_query(
flagged=flagged_bool,
status=status or None,
search=search or None,
sort_by=sort_by,
sort_dir=sort_dir,
limit=per_page,
offset=offset,
)
scans = (await session.execute(q)).scalars().all()
total = await session.scalar(count_q)
@@ -200,11 +92,15 @@ async def scans_list(
@router.get("/scans/{scan_id}", response_class=HTMLResponse)
async def scan_detail(scan_id: int, request: Request, session: AsyncSession = Depends(get_session)):
async def scan_detail(
scan_id: int, request: Request, session: AsyncSession = Depends(get_session)
):
from sqlalchemy.orm import selectinload
scan = await session.scalar(
select(Scan).where(Scan.id == scan_id).options(selectinload(Scan.findings))
select(Scan)
.where(Scan.id == scan_id)
.options(selectinload(Scan.findings))
)
if not scan:
return HTMLResponse("<h1>Not found</h1>", status_code=404)
@@ -218,45 +114,27 @@ async def packages_list(
page: int = 1,
flagged: str = "",
search: str = "",
sort_by: str = "last_scanned_at",
sort_dir: str = "desc",
sort_by: str = DEFAULT_SORT_BY_PACKAGES,
sort_dir: str = DEFAULT_SORT_DIR,
session: AsyncSession = Depends(get_session),
):
per_page = 50
per_page = WEB_PER_PAGE
offset = (page - 1) * per_page
subq = select(
Scan.package_name.label("pkg_name"),
Scan.package_version.label("pkg_ver"),
Scan.ecosystem,
Scan.repository,
func.max(Scan.started_at).label("last_scan"),
func.max(Scan.flagged).label("is_flagged"),
func.sum(Scan.total_findings).label("findings_sum"),
func.max(Scan.id).label("sid"),
).group_by(Scan.package_name, Scan.package_version)
flagged_bool = None
if flagged == "1":
subq = subq.having(func.max(Scan.flagged) == True)
if search:
pattern = f"%{search}%"
subq = subq.where(
Scan.package_name.ilike(pattern) | Scan.package_version.ilike(pattern)
)
flagged_bool = True
sort_field = PACKAGE_SORT_FIELDS.get(sort_by, Scan.started_at)
sort_col = func.max(sort_field)
subq = subq.order_by(
sort_col.desc() if sort_dir == "desc" else sort_col.asc()
rows_q, total_q = build_package_list_query(
flagged=flagged_bool,
search=search or None,
sort_by=sort_by,
sort_dir=sort_dir,
limit=per_page,
offset=offset,
)
sq = subq.subquery()
total = await session.scalar(select(func.count()).select_from(sq))
rows = (
await session.execute(
select(sq).offset(offset).limit(per_page)
)
).all()
total = await session.scalar(total_q)
rows = (await session.execute(rows_q)).all()
return _render(
"packages_list.html",
@@ -309,3 +187,36 @@ async def package_detail(
findings=all_findings,
request=request,
)
@router.post("/api/v1/findings/{finding_id}/analyze", response_class=HTMLResponse)
async def analyze_finding_htmx(
finding_id: int,
session: AsyncSession = Depends(get_session),
):
"""HTMX fragment: trigger LLM analysis and return styled result HTML."""
from guarddog_nexus.config import config
from guarddog_nexus.llm import analyze_finding
if not config.llm_enabled:
return HTMLResponse(
'<div class="llm-actions"><small class="flagged">LLM analysis is disabled</small></div>'
)
finding = await session.scalar(select(Finding).where(Finding.id == finding_id))
if not finding:
return HTMLResponse(
'<div class="llm-actions"><small class="flagged">Finding not found</small></div>',
status_code=404,
)
report = await analyze_finding(finding.data)
if report is None:
return HTMLResponse(
'<div class="llm-actions"><small class="flagged">LLM analysis failed</small></div>'
)
finding.report = report
await session.commit()
return _render("_llm_report_fragment.html", report=report)

View File

@@ -373,6 +373,49 @@ th.sortable.active .sort-icon {
justify-content: flex-end;
margin-bottom: 0.25rem;
}
/* LLM report */
.llm-report {
margin-top: 0.75rem;
padding: 0.6rem 0.8rem;
background: var(--pico-color-gray-700);
border-radius: 6px;
font-size: 0.85rem;
line-height: 1.5;
border-left: 3px solid var(--pico-color-blue-400);
}
.llm-report strong {
color: var(--pico-color-blue-300);
}
.verdict-safe {
color: var(--pico-color-green-400);
font-weight: bold;
}
.verdict-suspicious {
color: var(--pico-color-yellow-400);
font-weight: bold;
}
.verdict-malicious {
color: var(--pico-color-red-400);
font-weight: bold;
}
.llm-actions {
margin-top: 0.5rem;
}
.llm-actions button {
font-size: 0.8rem;
}
/* htmx indicator */
.htmx-indicator {
display: inline;
}
.toggle-all-btn {
font-size: 0.8rem;
margin-bottom: 0.5rem;

View File

@@ -0,0 +1,9 @@
<div class="llm-report">
<strong>LLM Analysis</strong>
<span class="verdict-{{ report.verdict }}">[{{ report.verdict }}]</span>
{% if report.severity_rating %}
<span class="severity-{{ report.severity_rating }}">({{ report.severity_rating }})</span>
{% endif %}
<p><em>{{ report.summary }}</em></p>
<p>{{ report.analysis }}</p>
</div>

View File

@@ -135,13 +135,13 @@
{% if top_rules %}
<div class="top-rules-chart">
<h3>Top Rules Triggered</h3>
{% for rule, cnt in top_rules %}
{% for r in top_rules %}
<div class="rule-bar-row">
<span class="rule-name" title="{{ rule }}"><code>{{ rule }}</code></span>
<span class="rule-name" title="{{ r.rule }}"><code>{{ r.rule }}</code></span>
<div class="rule-bar-container">
<div class="rule-bar" style="width: {{ (cnt / top_rules[0][1] * 100) | int if top_rules[0][1] > 0 else 0 }}%;"></div>
<div class="rule-bar" style="width: {{ (r.count / top_rules[0].count * 100) | int if top_rules[0].count > 0 else 0 }}%;"></div>
</div>
<span class="rule-count">{{ cnt }}</span>
<span class="rule-count">{{ r.count }}</span>
</div>
{% endfor %}
</div>

View File

@@ -57,6 +57,29 @@
</div>
<pre><code id="code-{{ f.id }}">{{ f.data.code }}</code></pre>
{% endif %}
{% if f.report %}
<div class="llm-report">
<strong>LLM Analysis</strong>
<span class="verdict-{{ f.report.verdict }}">[{{ f.report.verdict }}]</span>
<span class="severity-{{ f.report.severity_rating }}">({{ f.report.severity_rating }})</span>
<p><em>{{ f.report.summary }}</em></p>
<p>{{ f.report.analysis }}</p>
</div>
{% else %}
<div class="llm-actions" id="llm-{{ f.id }}">
<button class="outline"
hx-post="/api/v1/findings/{{ f.id }}/analyze"
hx-target="#llm-{{ f.id }}"
hx-swap="outerHTML"
hx-indicator="#llm-spinner-{{ f.id }}">
<span id="llm-spinner-{{ f.id }}" class="htmx-indicator" style="display:none;">
<span class="spinner"></span>
</span>
Analyze with LLM
</button>
</div>
{% endif %}
</div>
</details>
{% endfor %}

View File

@@ -51,6 +51,29 @@
</div>
<pre><code id="code-{{ f.id }}">{{ f.data.code }}</code></pre>
{% endif %}
{% if f.report %}
<div class="llm-report">
<strong>LLM Analysis</strong>
<span class="verdict-{{ f.report.verdict }}">[{{ f.report.verdict }}]</span>
<span class="severity-{{ f.report.severity_rating }}">({{ f.report.severity_rating }})</span>
<p><em>{{ f.report.summary }}</em></p>
<p>{{ f.report.analysis }}</p>
</div>
{% else %}
<div class="llm-actions" id="llm-{{ f.id }}">
<button class="outline"
hx-post="/api/v1/findings/{{ f.id }}/analyze"
hx-target="#llm-{{ f.id }}"
hx-swap="outerHTML"
hx-indicator="#llm-spinner-{{ f.id }}">
<span id="llm-spinner-{{ f.id }}" class="htmx-indicator" style="display:none;">
<span class="spinner"></span>
</span>
Analyze with LLM
</button>
</div>
{% endif %}
</div>
</details>
{% endfor %}