refactor: вынос хардкода + LLM-анализ finding'ов

## Часть A: Вынос хардкода
- Новый модуль constants.py — все magic strings, лимиты, severity, ключи
  (104 хардкод-значения централизованы)
- Новый модуль queries.py — общие SQL-запросы (build_scan_list_query,
  build_package_list_query, get_dashboard_stats)
  Убрана дупликация между api/*.py и web/routes.py (~90%)

- config.py: добавлены NLP_ENABLED, nexus_timeout, guarddog_binary,
  log_syslog_facility, LLM-переменные
- nexus_client.py: таймауты из конфига, SHA256_CHUNK_SIZE из constants
- scanner.py: error-ключи из constants, GUARDDOG_OUTPUT_FORMAT из constants
- webhooks.py: RELEVANT_WEBHOOK_ACTIONS, METADATA_PATTERNS, ignore-строки
  из constants
- logging_setup.py: конфигурируемый syslog facility, APP_PACKAGE из constants
- main.py: APP_NAME, APP_DESCRIPTION, APP_PACKAGE из constants
- models.py: поле report: JSON | None в Finding для LLM-отчётов
- harvester.py: авто-очистка tmpdir через finally; ERROR_MESSAGE_MAX_LENGTH
  из constants; PACKAGE_EXTENSIONS вместо SUPPORTED_EXTENSIONS (с .gem)
- api/*.py + web/routes.py: используют build_*_query из queries.py,
  константы для лимитов и сортировок
- tests/conftest.py: SEVERITY_WARNING, DEFAULT_ECOSYSTEM из constants

## Часть B: LLM-анализ finding'ов
- llm.py: клиент для OpenAI-совместимых API с промптом security-аналитика
- harvester.py: авто-триггер после flagged scan, сохранение report в БД
- api/findings.py: POST /{id}/analyze — ручной триггер
- web/routes.py: POST /api/v1/findings/{id}/analyze — HTMX-фрагмент
- _llm_report_fragment.html: шаблон фрагмента с вердиктом
- scan_detail.html, package_detail.html: кнопка Analyze with LLM
  (htmx-post, spinner, inline-замена на LLM-отчёт)
- style.css: стили для .llm-report .verdict-safe/suspicious/malicious

## Часть C: Тесты
- 50 тестов, все зелёные
- Линтер чистый
- Тесты используют constants где нужно
This commit is contained in:
Marker689
2026-05-10 04:37:07 +03:00
parent c43e7c4c9b
commit 834138368a
21 changed files with 1094 additions and 476 deletions

View File

@@ -4,64 +4,50 @@ import csv
import io
from fastapi import APIRouter, Depends, Query, Response
from sqlalchemy import func, select, text
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import selectinload
from guarddog_nexus.constants import (
CSV_MEDIA_TYPE,
DEFAULT_OFFSET,
DEFAULT_PAGE_SIZE,
DEFAULT_SORT_BY_SCANS,
DEFAULT_SORT_DIR,
MAX_PAGE_SIZE,
)
from guarddog_nexus.database import get_session
from guarddog_nexus.models import Finding, Scan
from guarddog_nexus.models import Scan
from guarddog_nexus.queries import build_scan_list_query, get_dashboard_stats
router = APIRouter(prefix="/api/v1/scans", tags=["scans"])
VALID_SORT_FIELDS = {
"id": Scan.id,
"package_name": Scan.package_name,
"started_at": Scan.started_at,
"status": Scan.status,
"total_findings": Scan.total_findings,
"flagged": Scan.flagged,
}
@router.get("")
async def list_scans(
limit: int = Query(50, le=200),
offset: int = Query(0, ge=0),
limit: int = Query(DEFAULT_PAGE_SIZE, le=MAX_PAGE_SIZE),
offset: int = Query(DEFAULT_OFFSET, ge=0),
flagged: bool | None = Query(None),
search: str | None = Query(None),
status: str | None = Query(None),
repository: str | None = Query(None),
sort_by: str = Query("started_at"),
sort_dir: str = Query("desc"),
sort_by: str = Query(DEFAULT_SORT_BY_SCANS),
sort_dir: str = Query(DEFAULT_SORT_DIR),
session: AsyncSession = Depends(get_session),
):
q = select(Scan)
count_q = select(func.count(Scan.id))
if flagged is not None:
q = q.where(Scan.flagged == flagged)
count_q = count_q.where(Scan.flagged == flagged)
if status:
q = q.where(Scan.status == status)
count_q = count_q.where(Scan.status == status)
if repository:
q = q.where(Scan.repository == repository)
count_q = count_q.where(Scan.repository == repository)
if search:
pattern = f"%{search}%"
condition = Scan.package_name.ilike(pattern) | Scan.package_version.ilike(pattern)
q = q.where(condition)
count_q = count_q.where(condition)
sort_field = VALID_SORT_FIELDS.get(sort_by, Scan.started_at)
sort_dir = "asc" if sort_dir.lower() == "asc" else "desc"
q = q.order_by(sort_field.desc() if sort_dir == "desc" else sort_field.asc())
q = q.offset(offset).limit(limit)
q, count_q = build_scan_list_query(
flagged=flagged,
status=status,
repository=repository,
search=search,
sort_by=sort_by,
sort_dir=sort_dir,
limit=limit,
offset=offset,
)
scans = (await session.execute(q)).scalars().all()
total = await session.scalar(count_q)
scans = (await session.execute(q)).scalars().all()
return {
"total": total,
"limit": limit,
@@ -92,78 +78,57 @@ async def export_scans_csv(
status: str | None = Query(None),
session: AsyncSession = Depends(get_session),
):
q = select(Scan)
if flagged is not None:
q = q.where(Scan.flagged == flagged)
if status:
q = q.where(Scan.status == status)
if search:
pattern = f"%{search}%"
q = q.where(
Scan.package_name.ilike(pattern) | Scan.package_version.ilike(pattern)
)
q = q.order_by(Scan.started_at.desc())
q, _count_q = build_scan_list_query(
flagged=flagged,
status=status,
search=search,
sort_by=DEFAULT_SORT_BY_SCANS,
sort_dir=DEFAULT_SORT_DIR,
limit=MAX_PAGE_SIZE,
offset=0,
)
scans = (await session.execute(q)).scalars().all()
output = io.StringIO()
writer = csv.writer(output)
writer.writerow([
"id", "package_name", "package_version", "ecosystem", "repository",
"status", "total_findings", "flagged", "started_at", "finished_at",
"error_message", "sha256"
])
writer.writerow(
[
"id", "package_name", "package_version", "ecosystem", "repository",
"status", "total_findings", "flagged", "started_at", "finished_at",
"error_message", "sha256",
]
)
for s in scans:
writer.writerow([
s.id, s.package_name, s.package_version, s.ecosystem, s.repository,
s.status, s.total_findings, s.flagged,
s.started_at.isoformat() if s.started_at else "",
s.finished_at.isoformat() if s.finished_at else "",
s.error_message or "",
s.sha256 or "",
])
writer.writerow(
[
s.id, s.package_name, s.package_version, s.ecosystem, s.repository,
s.status, s.total_findings, s.flagged,
s.started_at.isoformat() if s.started_at else "",
s.finished_at.isoformat() if s.finished_at else "",
s.error_message or "",
s.sha256 or "",
]
)
return Response(
content=output.getvalue(),
media_type="text/csv",
media_type=CSV_MEDIA_TYPE,
headers={"Content-Disposition": "attachment; filename=scans_export.csv"},
)
@router.get("/stats")
async def scan_stats(session: AsyncSession = Depends(get_session)):
total_scans = await session.scalar(select(func.count(Scan.id)))
flagged_scans = await session.scalar(select(func.count(Scan.id)).where(Scan.flagged == True))
recent_flagged = await session.scalar(
select(func.count(Scan.id)).where(
Scan.flagged == True,
Scan.started_at >= func.datetime("now", "-7 days"),
)
)
total_findings = await session.scalar(select(func.count(Finding.id)))
top_rules = (
await session.execute(
select(
func.json_extract(Finding.data, "$.rule").label("rule"),
func.count(Finding.id).label("cnt"),
)
.group_by(text("rule"))
.order_by(text("cnt DESC"))
.limit(10)
)
).all()
latest_scan = await session.scalar(select(Scan).order_by(Scan.started_at.desc()).limit(1))
dashboard = await get_dashboard_stats(session)
return {
"total_scans": total_scans,
"flagged_scans": flagged_scans,
"recent_flagged": recent_flagged,
"total_findings": total_findings,
"top_rules": [{"rule": r.rule, "count": r.cnt} for r in top_rules],
"latest_scan_at": latest_scan.started_at.isoformat() if latest_scan else None,
"total_scans": dashboard["total_scans"],
"flagged_scans": dashboard["flagged_scans"],
"recent_flagged": dashboard["recent_flagged"],
"total_findings": dashboard["total_findings"],
"top_rules": dashboard["top_rules"],
"latest_scan_at": dashboard["latest_flagged"][0].started_at.isoformat()
if dashboard["latest_flagged"]
else None,
}
@@ -188,5 +153,5 @@ async def get_scan(scan_id: int, session: AsyncSession = Depends(get_session)):
"started_at": scan.started_at.isoformat() if scan.started_at else None,
"finished_at": scan.finished_at.isoformat() if scan.finished_at else None,
"error_message": scan.error_message,
"findings": [{"id": f.id, **f.data} for f in scan.findings],
"findings": [{"id": f.id, **f.data, "report": f.report} for f in scan.findings],
}