refactor: вынос хардкода + LLM-анализ finding'ов

## Часть A: Вынос хардкода
- Новый модуль constants.py — все magic strings, лимиты, severity, ключи
  (104 хардкод-значения централизованы)
- Новый модуль queries.py — общие SQL-запросы (build_scan_list_query,
  build_package_list_query, get_dashboard_stats)
  Убрана дупликация между api/*.py и web/routes.py (~90%)

- config.py: добавлены NLP_ENABLED, nexus_timeout, guarddog_binary,
  log_syslog_facility, LLM-переменные
- nexus_client.py: таймауты из конфига, SHA256_CHUNK_SIZE из constants
- scanner.py: error-ключи из constants, GUARDDOG_OUTPUT_FORMAT из constants
- webhooks.py: RELEVANT_WEBHOOK_ACTIONS, METADATA_PATTERNS, ignore-строки
  из constants
- logging_setup.py: конфигурируемый syslog facility, APP_PACKAGE из constants
- main.py: APP_NAME, APP_DESCRIPTION, APP_PACKAGE из constants
- models.py: поле report: JSON | None в Finding для LLM-отчётов
- harvester.py: авто-очистка tmpdir через finally; ERROR_MESSAGE_MAX_LENGTH
  из constants; PACKAGE_EXTENSIONS вместо SUPPORTED_EXTENSIONS (с .gem)
- api/*.py + web/routes.py: используют build_*_query из queries.py,
  константы для лимитов и сортировок
- tests/conftest.py: SEVERITY_WARNING, DEFAULT_ECOSYSTEM из constants

## Часть B: LLM-анализ finding'ов
- llm.py: клиент для OpenAI-совместимых API с промптом security-аналитика
- harvester.py: авто-триггер после flagged scan, сохранение report в БД
- api/findings.py: POST /{id}/analyze — ручной триггер
- web/routes.py: POST /api/v1/findings/{id}/analyze — HTMX-фрагмент
- _llm_report_fragment.html: шаблон фрагмента с вердиктом
- scan_detail.html, package_detail.html: кнопка Analyze with LLM
  (htmx-post, spinner, inline-замена на LLM-отчёт)
- style.css: стили для .llm-report .verdict-safe/suspicious/malicious

## Часть C: Тесты
- 50 тестов, все зелёные
- Линтер чистый
- Тесты используют constants где нужно
This commit is contained in:
Marker689
2026-05-10 04:37:07 +03:00
parent c43e7c4c9b
commit 834138368a
21 changed files with 1094 additions and 476 deletions

View File

@@ -4,71 +4,48 @@ import csv
import io
from fastapi import APIRouter, Depends, Query, Response
from sqlalchemy import func, select
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from guarddog_nexus.constants import (
CSV_MEDIA_TYPE,
DEFAULT_OFFSET,
DEFAULT_PAGE_SIZE,
DEFAULT_SORT_BY_PACKAGES,
DEFAULT_SORT_DIR,
MAX_PAGE_SIZE,
)
from guarddog_nexus.database import get_session
from guarddog_nexus.models import Finding, Scan
from guarddog_nexus.queries import build_package_list_query
router = APIRouter(prefix="/api/v1/packages", tags=["packages"])
VALID_SORT_FIELDS = {
"name": Scan.package_name,
"version": Scan.package_version,
"last_scanned_at": Scan.started_at,
"total_findings": Scan.total_findings,
"flagged": Scan.flagged,
}
@router.get("")
async def list_packages(
limit: int = Query(50, le=200),
offset: int = Query(0, ge=0),
limit: int = Query(DEFAULT_PAGE_SIZE, le=MAX_PAGE_SIZE),
offset: int = Query(DEFAULT_OFFSET, ge=0),
ecosystem: str | None = Query(None),
flagged: bool | None = Query(None),
search: str | None = Query(None),
repository: str | None = Query(None),
sort_by: str = Query("last_scanned_at"),
sort_dir: str = Query("desc"),
sort_by: str = Query(DEFAULT_SORT_BY_PACKAGES),
sort_dir: str = Query(DEFAULT_SORT_DIR),
session: AsyncSession = Depends(get_session),
):
subq = select(
Scan.package_name,
Scan.package_version,
Scan.ecosystem,
Scan.repository,
func.max(Scan.started_at).label("last_scanned_at"),
func.max(Scan.flagged).label("is_flagged"),
func.sum(Scan.total_findings).label("total_findings"),
func.max(Scan.id).label("latest_scan_id"),
).group_by(Scan.package_name, Scan.package_version)
if ecosystem:
subq = subq.where(Scan.ecosystem == ecosystem)
if flagged is not None:
subq = subq.having(func.max(Scan.flagged) == flagged)
if repository:
subq = subq.where(Scan.repository == repository)
if search:
pattern = f"%{search}%"
subq = subq.where(
Scan.package_name.ilike(pattern) | Scan.package_version.ilike(pattern)
)
sort_field = VALID_SORT_FIELDS.get(sort_by, Scan.started_at)
sort_dir = "asc" if sort_dir.lower() == "asc" else "desc"
sort_col = func.max(sort_field)
subq = subq.order_by(
sort_col.desc() if sort_dir == "desc" else sort_col.asc()
rows_q, total_q = build_package_list_query(
flagged=flagged,
ecosystem=ecosystem,
repository=repository,
search=search,
sort_by=sort_by,
sort_dir=sort_dir,
limit=limit,
offset=offset,
)
total_q = select(func.count()).select_from(subq.subquery())
total = await session.scalar(total_q)
rows = (
await session.execute(subq.offset(offset).limit(limit))
).all()
rows = (await session.execute(rows_q)).all()
return {
"total": total,
@@ -76,14 +53,14 @@ async def list_packages(
"offset": offset,
"packages": [
{
"name": r.package_name,
"version": r.package_version,
"name": r.pkg_name,
"version": r.pkg_ver,
"ecosystem": r.ecosystem,
"repository": r.repository,
"last_scanned_at": r.last_scanned_at.isoformat() if r.last_scanned_at else None,
"last_scanned_at": r.last_scan.isoformat() if r.last_scan else None,
"flagged": bool(r.is_flagged),
"total_findings": r.total_findings,
"latest_scan_id": r.latest_scan_id,
"total_findings": r.findings_sum,
"latest_scan_id": r.sid,
}
for r in rows
],
@@ -96,44 +73,37 @@ async def export_packages_csv(
search: str | None = Query(None),
session: AsyncSession = Depends(get_session),
):
subq = select(
Scan.package_name,
Scan.package_version,
Scan.ecosystem,
Scan.repository,
func.max(Scan.started_at).label("last_scanned_at"),
func.max(Scan.flagged).label("is_flagged"),
func.sum(Scan.total_findings).label("total_findings"),
).group_by(Scan.package_name, Scan.package_version)
if flagged is not None:
subq = subq.having(func.max(Scan.flagged) == flagged)
if search:
pattern = f"%{search}%"
subq = subq.where(
Scan.package_name.ilike(pattern) | Scan.package_version.ilike(pattern)
)
subq = subq.order_by(func.max(Scan.started_at).desc())
rows = (await session.execute(subq)).all()
rows_q, _total_q = build_package_list_query(
flagged=flagged,
search=search,
sort_by=DEFAULT_SORT_BY_PACKAGES,
sort_dir=DEFAULT_SORT_DIR,
limit=MAX_PAGE_SIZE,
offset=0,
)
rows = (await session.execute(rows_q)).all()
output = io.StringIO()
writer = csv.writer(output)
writer.writerow([
"name", "version", "ecosystem", "repository",
"last_scanned_at", "flagged", "total_findings"
])
writer.writerow(
[
"name", "version", "ecosystem", "repository",
"last_scanned_at", "flagged", "total_findings",
]
)
for r in rows:
writer.writerow([
r.package_name, r.package_version, r.ecosystem, r.repository,
r.last_scanned_at.isoformat() if r.last_scanned_at else "",
bool(r.is_flagged),
r.total_findings,
])
writer.writerow(
[
r.pkg_name, r.pkg_ver, r.ecosystem, r.repository,
r.last_scan.isoformat() if r.last_scan else "",
bool(r.is_flagged),
r.findings_sum,
]
)
return Response(
content=output.getvalue(),
media_type="text/csv",
media_type=CSV_MEDIA_TYPE,
headers={"Content-Disposition": "attachment; filename=packages_export.csv"},
)
@@ -162,10 +132,12 @@ async def get_package(
all_findings: list[dict] = []
for s in scans:
findings = (
(await session.execute(select(Finding).where(Finding.scan_id == s.id))).scalars().all()
(await session.execute(select(Finding).where(Finding.scan_id == s.id)))
.scalars()
.all()
)
for f in findings:
all_findings.append({"id": f.id, **f.data})
all_findings.append({"id": f.id, **f.data, "report": f.report})
return {
"name": scans[0].package_name,