refactor: вынос хардкода + LLM-анализ finding'ов

## Часть A: Вынос хардкода
- Новый модуль constants.py — все magic strings, лимиты, severity, ключи
  (104 хардкод-значения централизованы)
- Новый модуль queries.py — общие SQL-запросы (build_scan_list_query,
  build_package_list_query, get_dashboard_stats)
  Убрана дупликация между api/*.py и web/routes.py (~90%)

- config.py: добавлены NLP_ENABLED, nexus_timeout, guarddog_binary,
  log_syslog_facility, LLM-переменные
- nexus_client.py: таймауты из конфига, SHA256_CHUNK_SIZE из constants
- scanner.py: error-ключи из constants, GUARDDOG_OUTPUT_FORMAT из constants
- webhooks.py: RELEVANT_WEBHOOK_ACTIONS, METADATA_PATTERNS, ignore-строки
  из constants
- logging_setup.py: конфигурируемый syslog facility, APP_PACKAGE из constants
- main.py: APP_NAME, APP_DESCRIPTION, APP_PACKAGE из constants
- models.py: поле report: JSON | None в Finding для LLM-отчётов
- harvester.py: авто-очистка tmpdir через finally; ERROR_MESSAGE_MAX_LENGTH
  из constants; PACKAGE_EXTENSIONS вместо SUPPORTED_EXTENSIONS (с .gem)
- api/*.py + web/routes.py: используют build_*_query из queries.py,
  константы для лимитов и сортировок
- tests/conftest.py: SEVERITY_WARNING, DEFAULT_ECOSYSTEM из constants

## Часть B: LLM-анализ finding'ов
- llm.py: клиент для OpenAI-совместимых API с промптом security-аналитика
- harvester.py: авто-триггер после flagged scan, сохранение report в БД
- api/findings.py: POST /{id}/analyze — ручной триггер
- web/routes.py: POST /api/v1/findings/{id}/analyze — HTMX-фрагмент
- _llm_report_fragment.html: шаблон фрагмента с вердиктом
- scan_detail.html, package_detail.html: кнопка Analyze with LLM
  (htmx-post, spinner, inline-замена на LLM-отчёт)
- style.css: стили для .llm-report .verdict-safe/suspicious/malicious

## Часть C: Тесты
- 50 тестов, все зелёные
- Линтер чистый
- Тесты используют constants где нужно
This commit is contained in:
Marker689
2026-05-10 04:37:07 +03:00
parent c43e7c4c9b
commit 834138368a
21 changed files with 1094 additions and 476 deletions

View File

@@ -4,6 +4,14 @@ from fastapi import APIRouter, Depends, Query
from sqlalchemy import func, select
from sqlalchemy.ext.asyncio import AsyncSession
from guarddog_nexus.config import config
from guarddog_nexus.constants import (
DEFAULT_OFFSET,
DEFAULT_PAGE_SIZE,
JSON_PATH_RULE,
JSON_PATH_SEVERITY,
MAX_PAGE_SIZE,
)
from guarddog_nexus.database import get_session
from guarddog_nexus.models import Finding
@@ -12,8 +20,8 @@ router = APIRouter(prefix="/api/v1/findings", tags=["findings"])
@router.get("")
async def list_findings(
limit: int = Query(50, le=200),
offset: int = Query(0, ge=0),
limit: int = Query(DEFAULT_PAGE_SIZE, le=MAX_PAGE_SIZE),
offset: int = Query(DEFAULT_OFFSET, ge=0),
rule: str | None = Query(None),
severity: str | None = Query(None),
scan_id: int | None = Query(None),
@@ -21,9 +29,9 @@ async def list_findings(
):
q = select(Finding)
if rule:
q = q.where(func.json_extract(Finding.data, "$.rule") == rule)
q = q.where(func.json_extract(Finding.data, JSON_PATH_RULE) == rule)
if severity:
q = q.where(func.json_extract(Finding.data, "$.severity") == severity)
q = q.where(func.json_extract(Finding.data, JSON_PATH_SEVERITY) == severity)
if scan_id:
q = q.where(Finding.scan_id == scan_id)
@@ -39,8 +47,40 @@ async def list_findings(
"id": f.id,
"scan_id": f.scan_id,
**f.data,
"report": f.report,
"created_at": f.created_at.isoformat() if f.created_at else None,
}
for f in findings
],
}
@router.post("/{finding_id}/analyze")
async def analyze_finding_endpoint(
finding_id: int,
session: AsyncSession = Depends(get_session),
):
"""Manually trigger LLM analysis for a single finding."""
if not config.llm_enabled:
return {"detail": "LLM analysis is disabled"}
finding = await session.scalar(
select(Finding).where(Finding.id == finding_id)
)
if not finding:
return {"detail": "Not found"}
from guarddog_nexus.llm import analyze_finding
report = await analyze_finding(finding.data)
if report is None:
return {"detail": "LLM analysis failed"}
finding.report = report
await session.commit()
return {
"id": finding.id,
**finding.data,
"report": report,
}

View File

@@ -4,71 +4,48 @@ import csv
import io
from fastapi import APIRouter, Depends, Query, Response
from sqlalchemy import func, select
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from guarddog_nexus.constants import (
CSV_MEDIA_TYPE,
DEFAULT_OFFSET,
DEFAULT_PAGE_SIZE,
DEFAULT_SORT_BY_PACKAGES,
DEFAULT_SORT_DIR,
MAX_PAGE_SIZE,
)
from guarddog_nexus.database import get_session
from guarddog_nexus.models import Finding, Scan
from guarddog_nexus.queries import build_package_list_query
router = APIRouter(prefix="/api/v1/packages", tags=["packages"])
VALID_SORT_FIELDS = {
"name": Scan.package_name,
"version": Scan.package_version,
"last_scanned_at": Scan.started_at,
"total_findings": Scan.total_findings,
"flagged": Scan.flagged,
}
@router.get("")
async def list_packages(
limit: int = Query(50, le=200),
offset: int = Query(0, ge=0),
limit: int = Query(DEFAULT_PAGE_SIZE, le=MAX_PAGE_SIZE),
offset: int = Query(DEFAULT_OFFSET, ge=0),
ecosystem: str | None = Query(None),
flagged: bool | None = Query(None),
search: str | None = Query(None),
repository: str | None = Query(None),
sort_by: str = Query("last_scanned_at"),
sort_dir: str = Query("desc"),
sort_by: str = Query(DEFAULT_SORT_BY_PACKAGES),
sort_dir: str = Query(DEFAULT_SORT_DIR),
session: AsyncSession = Depends(get_session),
):
subq = select(
Scan.package_name,
Scan.package_version,
Scan.ecosystem,
Scan.repository,
func.max(Scan.started_at).label("last_scanned_at"),
func.max(Scan.flagged).label("is_flagged"),
func.sum(Scan.total_findings).label("total_findings"),
func.max(Scan.id).label("latest_scan_id"),
).group_by(Scan.package_name, Scan.package_version)
if ecosystem:
subq = subq.where(Scan.ecosystem == ecosystem)
if flagged is not None:
subq = subq.having(func.max(Scan.flagged) == flagged)
if repository:
subq = subq.where(Scan.repository == repository)
if search:
pattern = f"%{search}%"
subq = subq.where(
Scan.package_name.ilike(pattern) | Scan.package_version.ilike(pattern)
)
sort_field = VALID_SORT_FIELDS.get(sort_by, Scan.started_at)
sort_dir = "asc" if sort_dir.lower() == "asc" else "desc"
sort_col = func.max(sort_field)
subq = subq.order_by(
sort_col.desc() if sort_dir == "desc" else sort_col.asc()
rows_q, total_q = build_package_list_query(
flagged=flagged,
ecosystem=ecosystem,
repository=repository,
search=search,
sort_by=sort_by,
sort_dir=sort_dir,
limit=limit,
offset=offset,
)
total_q = select(func.count()).select_from(subq.subquery())
total = await session.scalar(total_q)
rows = (
await session.execute(subq.offset(offset).limit(limit))
).all()
rows = (await session.execute(rows_q)).all()
return {
"total": total,
@@ -76,14 +53,14 @@ async def list_packages(
"offset": offset,
"packages": [
{
"name": r.package_name,
"version": r.package_version,
"name": r.pkg_name,
"version": r.pkg_ver,
"ecosystem": r.ecosystem,
"repository": r.repository,
"last_scanned_at": r.last_scanned_at.isoformat() if r.last_scanned_at else None,
"last_scanned_at": r.last_scan.isoformat() if r.last_scan else None,
"flagged": bool(r.is_flagged),
"total_findings": r.total_findings,
"latest_scan_id": r.latest_scan_id,
"total_findings": r.findings_sum,
"latest_scan_id": r.sid,
}
for r in rows
],
@@ -96,44 +73,37 @@ async def export_packages_csv(
search: str | None = Query(None),
session: AsyncSession = Depends(get_session),
):
subq = select(
Scan.package_name,
Scan.package_version,
Scan.ecosystem,
Scan.repository,
func.max(Scan.started_at).label("last_scanned_at"),
func.max(Scan.flagged).label("is_flagged"),
func.sum(Scan.total_findings).label("total_findings"),
).group_by(Scan.package_name, Scan.package_version)
if flagged is not None:
subq = subq.having(func.max(Scan.flagged) == flagged)
if search:
pattern = f"%{search}%"
subq = subq.where(
Scan.package_name.ilike(pattern) | Scan.package_version.ilike(pattern)
)
subq = subq.order_by(func.max(Scan.started_at).desc())
rows = (await session.execute(subq)).all()
rows_q, _total_q = build_package_list_query(
flagged=flagged,
search=search,
sort_by=DEFAULT_SORT_BY_PACKAGES,
sort_dir=DEFAULT_SORT_DIR,
limit=MAX_PAGE_SIZE,
offset=0,
)
rows = (await session.execute(rows_q)).all()
output = io.StringIO()
writer = csv.writer(output)
writer.writerow([
"name", "version", "ecosystem", "repository",
"last_scanned_at", "flagged", "total_findings"
])
writer.writerow(
[
"name", "version", "ecosystem", "repository",
"last_scanned_at", "flagged", "total_findings",
]
)
for r in rows:
writer.writerow([
r.package_name, r.package_version, r.ecosystem, r.repository,
r.last_scanned_at.isoformat() if r.last_scanned_at else "",
bool(r.is_flagged),
r.total_findings,
])
writer.writerow(
[
r.pkg_name, r.pkg_ver, r.ecosystem, r.repository,
r.last_scan.isoformat() if r.last_scan else "",
bool(r.is_flagged),
r.findings_sum,
]
)
return Response(
content=output.getvalue(),
media_type="text/csv",
media_type=CSV_MEDIA_TYPE,
headers={"Content-Disposition": "attachment; filename=packages_export.csv"},
)
@@ -162,10 +132,12 @@ async def get_package(
all_findings: list[dict] = []
for s in scans:
findings = (
(await session.execute(select(Finding).where(Finding.scan_id == s.id))).scalars().all()
(await session.execute(select(Finding).where(Finding.scan_id == s.id)))
.scalars()
.all()
)
for f in findings:
all_findings.append({"id": f.id, **f.data})
all_findings.append({"id": f.id, **f.data, "report": f.report})
return {
"name": scans[0].package_name,

View File

@@ -4,64 +4,50 @@ import csv
import io
from fastapi import APIRouter, Depends, Query, Response
from sqlalchemy import func, select, text
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import selectinload
from guarddog_nexus.constants import (
CSV_MEDIA_TYPE,
DEFAULT_OFFSET,
DEFAULT_PAGE_SIZE,
DEFAULT_SORT_BY_SCANS,
DEFAULT_SORT_DIR,
MAX_PAGE_SIZE,
)
from guarddog_nexus.database import get_session
from guarddog_nexus.models import Finding, Scan
from guarddog_nexus.models import Scan
from guarddog_nexus.queries import build_scan_list_query, get_dashboard_stats
router = APIRouter(prefix="/api/v1/scans", tags=["scans"])
VALID_SORT_FIELDS = {
"id": Scan.id,
"package_name": Scan.package_name,
"started_at": Scan.started_at,
"status": Scan.status,
"total_findings": Scan.total_findings,
"flagged": Scan.flagged,
}
@router.get("")
async def list_scans(
limit: int = Query(50, le=200),
offset: int = Query(0, ge=0),
limit: int = Query(DEFAULT_PAGE_SIZE, le=MAX_PAGE_SIZE),
offset: int = Query(DEFAULT_OFFSET, ge=0),
flagged: bool | None = Query(None),
search: str | None = Query(None),
status: str | None = Query(None),
repository: str | None = Query(None),
sort_by: str = Query("started_at"),
sort_dir: str = Query("desc"),
sort_by: str = Query(DEFAULT_SORT_BY_SCANS),
sort_dir: str = Query(DEFAULT_SORT_DIR),
session: AsyncSession = Depends(get_session),
):
q = select(Scan)
count_q = select(func.count(Scan.id))
if flagged is not None:
q = q.where(Scan.flagged == flagged)
count_q = count_q.where(Scan.flagged == flagged)
if status:
q = q.where(Scan.status == status)
count_q = count_q.where(Scan.status == status)
if repository:
q = q.where(Scan.repository == repository)
count_q = count_q.where(Scan.repository == repository)
if search:
pattern = f"%{search}%"
condition = Scan.package_name.ilike(pattern) | Scan.package_version.ilike(pattern)
q = q.where(condition)
count_q = count_q.where(condition)
sort_field = VALID_SORT_FIELDS.get(sort_by, Scan.started_at)
sort_dir = "asc" if sort_dir.lower() == "asc" else "desc"
q = q.order_by(sort_field.desc() if sort_dir == "desc" else sort_field.asc())
q = q.offset(offset).limit(limit)
q, count_q = build_scan_list_query(
flagged=flagged,
status=status,
repository=repository,
search=search,
sort_by=sort_by,
sort_dir=sort_dir,
limit=limit,
offset=offset,
)
scans = (await session.execute(q)).scalars().all()
total = await session.scalar(count_q)
scans = (await session.execute(q)).scalars().all()
return {
"total": total,
"limit": limit,
@@ -92,78 +78,57 @@ async def export_scans_csv(
status: str | None = Query(None),
session: AsyncSession = Depends(get_session),
):
q = select(Scan)
if flagged is not None:
q = q.where(Scan.flagged == flagged)
if status:
q = q.where(Scan.status == status)
if search:
pattern = f"%{search}%"
q = q.where(
Scan.package_name.ilike(pattern) | Scan.package_version.ilike(pattern)
)
q = q.order_by(Scan.started_at.desc())
q, _count_q = build_scan_list_query(
flagged=flagged,
status=status,
search=search,
sort_by=DEFAULT_SORT_BY_SCANS,
sort_dir=DEFAULT_SORT_DIR,
limit=MAX_PAGE_SIZE,
offset=0,
)
scans = (await session.execute(q)).scalars().all()
output = io.StringIO()
writer = csv.writer(output)
writer.writerow([
"id", "package_name", "package_version", "ecosystem", "repository",
"status", "total_findings", "flagged", "started_at", "finished_at",
"error_message", "sha256"
])
writer.writerow(
[
"id", "package_name", "package_version", "ecosystem", "repository",
"status", "total_findings", "flagged", "started_at", "finished_at",
"error_message", "sha256",
]
)
for s in scans:
writer.writerow([
s.id, s.package_name, s.package_version, s.ecosystem, s.repository,
s.status, s.total_findings, s.flagged,
s.started_at.isoformat() if s.started_at else "",
s.finished_at.isoformat() if s.finished_at else "",
s.error_message or "",
s.sha256 or "",
])
writer.writerow(
[
s.id, s.package_name, s.package_version, s.ecosystem, s.repository,
s.status, s.total_findings, s.flagged,
s.started_at.isoformat() if s.started_at else "",
s.finished_at.isoformat() if s.finished_at else "",
s.error_message or "",
s.sha256 or "",
]
)
return Response(
content=output.getvalue(),
media_type="text/csv",
media_type=CSV_MEDIA_TYPE,
headers={"Content-Disposition": "attachment; filename=scans_export.csv"},
)
@router.get("/stats")
async def scan_stats(session: AsyncSession = Depends(get_session)):
total_scans = await session.scalar(select(func.count(Scan.id)))
flagged_scans = await session.scalar(select(func.count(Scan.id)).where(Scan.flagged == True))
recent_flagged = await session.scalar(
select(func.count(Scan.id)).where(
Scan.flagged == True,
Scan.started_at >= func.datetime("now", "-7 days"),
)
)
total_findings = await session.scalar(select(func.count(Finding.id)))
top_rules = (
await session.execute(
select(
func.json_extract(Finding.data, "$.rule").label("rule"),
func.count(Finding.id).label("cnt"),
)
.group_by(text("rule"))
.order_by(text("cnt DESC"))
.limit(10)
)
).all()
latest_scan = await session.scalar(select(Scan).order_by(Scan.started_at.desc()).limit(1))
dashboard = await get_dashboard_stats(session)
return {
"total_scans": total_scans,
"flagged_scans": flagged_scans,
"recent_flagged": recent_flagged,
"total_findings": total_findings,
"top_rules": [{"rule": r.rule, "count": r.cnt} for r in top_rules],
"latest_scan_at": latest_scan.started_at.isoformat() if latest_scan else None,
"total_scans": dashboard["total_scans"],
"flagged_scans": dashboard["flagged_scans"],
"recent_flagged": dashboard["recent_flagged"],
"total_findings": dashboard["total_findings"],
"top_rules": dashboard["top_rules"],
"latest_scan_at": dashboard["latest_flagged"][0].started_at.isoformat()
if dashboard["latest_flagged"]
else None,
}
@@ -188,5 +153,5 @@ async def get_scan(scan_id: int, session: AsyncSession = Depends(get_session)):
"started_at": scan.started_at.isoformat() if scan.started_at else None,
"finished_at": scan.finished_at.isoformat() if scan.finished_at else None,
"error_message": scan.error_message,
"findings": [{"id": f.id, **f.data} for f in scan.findings],
"findings": [{"id": f.id, **f.data, "report": f.report} for f in scan.findings],
}