refactor: вынос хардкода + LLM-анализ finding'ов
## Часть A: Вынос хардкода
- Новый модуль constants.py — все magic strings, лимиты, severity, ключи
(104 хардкод-значения централизованы)
- Новый модуль queries.py — общие SQL-запросы (build_scan_list_query,
build_package_list_query, get_dashboard_stats)
Убрана дупликация между api/*.py и web/routes.py (~90%)
- config.py: добавлены NLP_ENABLED, nexus_timeout, guarddog_binary,
log_syslog_facility, LLM-переменные
- nexus_client.py: таймауты из конфига, SHA256_CHUNK_SIZE из constants
- scanner.py: error-ключи из constants, GUARDDOG_OUTPUT_FORMAT из constants
- webhooks.py: RELEVANT_WEBHOOK_ACTIONS, METADATA_PATTERNS, ignore-строки
из constants
- logging_setup.py: конфигурируемый syslog facility, APP_PACKAGE из constants
- main.py: APP_NAME, APP_DESCRIPTION, APP_PACKAGE из constants
- models.py: поле report: JSON | None в Finding для LLM-отчётов
- harvester.py: авто-очистка tmpdir через finally; ERROR_MESSAGE_MAX_LENGTH
из constants; PACKAGE_EXTENSIONS вместо SUPPORTED_EXTENSIONS (с .gem)
- api/*.py + web/routes.py: используют build_*_query из queries.py,
константы для лимитов и сортировок
- tests/conftest.py: SEVERITY_WARNING, DEFAULT_ECOSYSTEM из constants
## Часть B: LLM-анализ finding'ов
- llm.py: клиент для OpenAI-совместимых API с промптом security-аналитика
- harvester.py: авто-триггер после flagged scan, сохранение report в БД
- api/findings.py: POST /{id}/analyze — ручной триггер
- web/routes.py: POST /api/v1/findings/{id}/analyze — HTMX-фрагмент
- _llm_report_fragment.html: шаблон фрагмента с вердиктом
- scan_detail.html, package_detail.html: кнопка Analyze with LLM
(htmx-post, spinner, inline-замена на LLM-отчёт)
- style.css: стили для .llm-report .verdict-safe/suspicious/malicious
## Часть C: Тесты
- 50 тестов, все зелёные
- Линтер чистый
- Тесты используют constants где нужно
This commit is contained in:
@@ -4,6 +4,14 @@ from fastapi import APIRouter, Depends, Query
|
||||
from sqlalchemy import func, select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from guarddog_nexus.config import config
|
||||
from guarddog_nexus.constants import (
|
||||
DEFAULT_OFFSET,
|
||||
DEFAULT_PAGE_SIZE,
|
||||
JSON_PATH_RULE,
|
||||
JSON_PATH_SEVERITY,
|
||||
MAX_PAGE_SIZE,
|
||||
)
|
||||
from guarddog_nexus.database import get_session
|
||||
from guarddog_nexus.models import Finding
|
||||
|
||||
@@ -12,8 +20,8 @@ router = APIRouter(prefix="/api/v1/findings", tags=["findings"])
|
||||
|
||||
@router.get("")
|
||||
async def list_findings(
|
||||
limit: int = Query(50, le=200),
|
||||
offset: int = Query(0, ge=0),
|
||||
limit: int = Query(DEFAULT_PAGE_SIZE, le=MAX_PAGE_SIZE),
|
||||
offset: int = Query(DEFAULT_OFFSET, ge=0),
|
||||
rule: str | None = Query(None),
|
||||
severity: str | None = Query(None),
|
||||
scan_id: int | None = Query(None),
|
||||
@@ -21,9 +29,9 @@ async def list_findings(
|
||||
):
|
||||
q = select(Finding)
|
||||
if rule:
|
||||
q = q.where(func.json_extract(Finding.data, "$.rule") == rule)
|
||||
q = q.where(func.json_extract(Finding.data, JSON_PATH_RULE) == rule)
|
||||
if severity:
|
||||
q = q.where(func.json_extract(Finding.data, "$.severity") == severity)
|
||||
q = q.where(func.json_extract(Finding.data, JSON_PATH_SEVERITY) == severity)
|
||||
if scan_id:
|
||||
q = q.where(Finding.scan_id == scan_id)
|
||||
|
||||
@@ -39,8 +47,40 @@ async def list_findings(
|
||||
"id": f.id,
|
||||
"scan_id": f.scan_id,
|
||||
**f.data,
|
||||
"report": f.report,
|
||||
"created_at": f.created_at.isoformat() if f.created_at else None,
|
||||
}
|
||||
for f in findings
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
@router.post("/{finding_id}/analyze")
|
||||
async def analyze_finding_endpoint(
|
||||
finding_id: int,
|
||||
session: AsyncSession = Depends(get_session),
|
||||
):
|
||||
"""Manually trigger LLM analysis for a single finding."""
|
||||
if not config.llm_enabled:
|
||||
return {"detail": "LLM analysis is disabled"}
|
||||
|
||||
finding = await session.scalar(
|
||||
select(Finding).where(Finding.id == finding_id)
|
||||
)
|
||||
if not finding:
|
||||
return {"detail": "Not found"}
|
||||
|
||||
from guarddog_nexus.llm import analyze_finding
|
||||
|
||||
report = await analyze_finding(finding.data)
|
||||
if report is None:
|
||||
return {"detail": "LLM analysis failed"}
|
||||
|
||||
finding.report = report
|
||||
await session.commit()
|
||||
|
||||
return {
|
||||
"id": finding.id,
|
||||
**finding.data,
|
||||
"report": report,
|
||||
}
|
||||
|
||||
@@ -4,71 +4,48 @@ import csv
|
||||
import io
|
||||
|
||||
from fastapi import APIRouter, Depends, Query, Response
|
||||
from sqlalchemy import func, select
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from guarddog_nexus.constants import (
|
||||
CSV_MEDIA_TYPE,
|
||||
DEFAULT_OFFSET,
|
||||
DEFAULT_PAGE_SIZE,
|
||||
DEFAULT_SORT_BY_PACKAGES,
|
||||
DEFAULT_SORT_DIR,
|
||||
MAX_PAGE_SIZE,
|
||||
)
|
||||
from guarddog_nexus.database import get_session
|
||||
from guarddog_nexus.models import Finding, Scan
|
||||
from guarddog_nexus.queries import build_package_list_query
|
||||
|
||||
router = APIRouter(prefix="/api/v1/packages", tags=["packages"])
|
||||
|
||||
VALID_SORT_FIELDS = {
|
||||
"name": Scan.package_name,
|
||||
"version": Scan.package_version,
|
||||
"last_scanned_at": Scan.started_at,
|
||||
"total_findings": Scan.total_findings,
|
||||
"flagged": Scan.flagged,
|
||||
}
|
||||
|
||||
|
||||
@router.get("")
|
||||
async def list_packages(
|
||||
limit: int = Query(50, le=200),
|
||||
offset: int = Query(0, ge=0),
|
||||
limit: int = Query(DEFAULT_PAGE_SIZE, le=MAX_PAGE_SIZE),
|
||||
offset: int = Query(DEFAULT_OFFSET, ge=0),
|
||||
ecosystem: str | None = Query(None),
|
||||
flagged: bool | None = Query(None),
|
||||
search: str | None = Query(None),
|
||||
repository: str | None = Query(None),
|
||||
sort_by: str = Query("last_scanned_at"),
|
||||
sort_dir: str = Query("desc"),
|
||||
sort_by: str = Query(DEFAULT_SORT_BY_PACKAGES),
|
||||
sort_dir: str = Query(DEFAULT_SORT_DIR),
|
||||
session: AsyncSession = Depends(get_session),
|
||||
):
|
||||
subq = select(
|
||||
Scan.package_name,
|
||||
Scan.package_version,
|
||||
Scan.ecosystem,
|
||||
Scan.repository,
|
||||
func.max(Scan.started_at).label("last_scanned_at"),
|
||||
func.max(Scan.flagged).label("is_flagged"),
|
||||
func.sum(Scan.total_findings).label("total_findings"),
|
||||
func.max(Scan.id).label("latest_scan_id"),
|
||||
).group_by(Scan.package_name, Scan.package_version)
|
||||
|
||||
if ecosystem:
|
||||
subq = subq.where(Scan.ecosystem == ecosystem)
|
||||
if flagged is not None:
|
||||
subq = subq.having(func.max(Scan.flagged) == flagged)
|
||||
if repository:
|
||||
subq = subq.where(Scan.repository == repository)
|
||||
if search:
|
||||
pattern = f"%{search}%"
|
||||
subq = subq.where(
|
||||
Scan.package_name.ilike(pattern) | Scan.package_version.ilike(pattern)
|
||||
)
|
||||
|
||||
sort_field = VALID_SORT_FIELDS.get(sort_by, Scan.started_at)
|
||||
sort_dir = "asc" if sort_dir.lower() == "asc" else "desc"
|
||||
sort_col = func.max(sort_field)
|
||||
subq = subq.order_by(
|
||||
sort_col.desc() if sort_dir == "desc" else sort_col.asc()
|
||||
rows_q, total_q = build_package_list_query(
|
||||
flagged=flagged,
|
||||
ecosystem=ecosystem,
|
||||
repository=repository,
|
||||
search=search,
|
||||
sort_by=sort_by,
|
||||
sort_dir=sort_dir,
|
||||
limit=limit,
|
||||
offset=offset,
|
||||
)
|
||||
|
||||
total_q = select(func.count()).select_from(subq.subquery())
|
||||
total = await session.scalar(total_q)
|
||||
|
||||
rows = (
|
||||
await session.execute(subq.offset(offset).limit(limit))
|
||||
).all()
|
||||
rows = (await session.execute(rows_q)).all()
|
||||
|
||||
return {
|
||||
"total": total,
|
||||
@@ -76,14 +53,14 @@ async def list_packages(
|
||||
"offset": offset,
|
||||
"packages": [
|
||||
{
|
||||
"name": r.package_name,
|
||||
"version": r.package_version,
|
||||
"name": r.pkg_name,
|
||||
"version": r.pkg_ver,
|
||||
"ecosystem": r.ecosystem,
|
||||
"repository": r.repository,
|
||||
"last_scanned_at": r.last_scanned_at.isoformat() if r.last_scanned_at else None,
|
||||
"last_scanned_at": r.last_scan.isoformat() if r.last_scan else None,
|
||||
"flagged": bool(r.is_flagged),
|
||||
"total_findings": r.total_findings,
|
||||
"latest_scan_id": r.latest_scan_id,
|
||||
"total_findings": r.findings_sum,
|
||||
"latest_scan_id": r.sid,
|
||||
}
|
||||
for r in rows
|
||||
],
|
||||
@@ -96,44 +73,37 @@ async def export_packages_csv(
|
||||
search: str | None = Query(None),
|
||||
session: AsyncSession = Depends(get_session),
|
||||
):
|
||||
subq = select(
|
||||
Scan.package_name,
|
||||
Scan.package_version,
|
||||
Scan.ecosystem,
|
||||
Scan.repository,
|
||||
func.max(Scan.started_at).label("last_scanned_at"),
|
||||
func.max(Scan.flagged).label("is_flagged"),
|
||||
func.sum(Scan.total_findings).label("total_findings"),
|
||||
).group_by(Scan.package_name, Scan.package_version)
|
||||
|
||||
if flagged is not None:
|
||||
subq = subq.having(func.max(Scan.flagged) == flagged)
|
||||
if search:
|
||||
pattern = f"%{search}%"
|
||||
subq = subq.where(
|
||||
Scan.package_name.ilike(pattern) | Scan.package_version.ilike(pattern)
|
||||
)
|
||||
|
||||
subq = subq.order_by(func.max(Scan.started_at).desc())
|
||||
rows = (await session.execute(subq)).all()
|
||||
rows_q, _total_q = build_package_list_query(
|
||||
flagged=flagged,
|
||||
search=search,
|
||||
sort_by=DEFAULT_SORT_BY_PACKAGES,
|
||||
sort_dir=DEFAULT_SORT_DIR,
|
||||
limit=MAX_PAGE_SIZE,
|
||||
offset=0,
|
||||
)
|
||||
rows = (await session.execute(rows_q)).all()
|
||||
|
||||
output = io.StringIO()
|
||||
writer = csv.writer(output)
|
||||
writer.writerow([
|
||||
"name", "version", "ecosystem", "repository",
|
||||
"last_scanned_at", "flagged", "total_findings"
|
||||
])
|
||||
writer.writerow(
|
||||
[
|
||||
"name", "version", "ecosystem", "repository",
|
||||
"last_scanned_at", "flagged", "total_findings",
|
||||
]
|
||||
)
|
||||
for r in rows:
|
||||
writer.writerow([
|
||||
r.package_name, r.package_version, r.ecosystem, r.repository,
|
||||
r.last_scanned_at.isoformat() if r.last_scanned_at else "",
|
||||
bool(r.is_flagged),
|
||||
r.total_findings,
|
||||
])
|
||||
writer.writerow(
|
||||
[
|
||||
r.pkg_name, r.pkg_ver, r.ecosystem, r.repository,
|
||||
r.last_scan.isoformat() if r.last_scan else "",
|
||||
bool(r.is_flagged),
|
||||
r.findings_sum,
|
||||
]
|
||||
)
|
||||
|
||||
return Response(
|
||||
content=output.getvalue(),
|
||||
media_type="text/csv",
|
||||
media_type=CSV_MEDIA_TYPE,
|
||||
headers={"Content-Disposition": "attachment; filename=packages_export.csv"},
|
||||
)
|
||||
|
||||
@@ -162,10 +132,12 @@ async def get_package(
|
||||
all_findings: list[dict] = []
|
||||
for s in scans:
|
||||
findings = (
|
||||
(await session.execute(select(Finding).where(Finding.scan_id == s.id))).scalars().all()
|
||||
(await session.execute(select(Finding).where(Finding.scan_id == s.id)))
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
for f in findings:
|
||||
all_findings.append({"id": f.id, **f.data})
|
||||
all_findings.append({"id": f.id, **f.data, "report": f.report})
|
||||
|
||||
return {
|
||||
"name": scans[0].package_name,
|
||||
|
||||
@@ -4,64 +4,50 @@ import csv
|
||||
import io
|
||||
|
||||
from fastapi import APIRouter, Depends, Query, Response
|
||||
from sqlalchemy import func, select, text
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy.orm import selectinload
|
||||
|
||||
from guarddog_nexus.constants import (
|
||||
CSV_MEDIA_TYPE,
|
||||
DEFAULT_OFFSET,
|
||||
DEFAULT_PAGE_SIZE,
|
||||
DEFAULT_SORT_BY_SCANS,
|
||||
DEFAULT_SORT_DIR,
|
||||
MAX_PAGE_SIZE,
|
||||
)
|
||||
from guarddog_nexus.database import get_session
|
||||
from guarddog_nexus.models import Finding, Scan
|
||||
from guarddog_nexus.models import Scan
|
||||
from guarddog_nexus.queries import build_scan_list_query, get_dashboard_stats
|
||||
|
||||
router = APIRouter(prefix="/api/v1/scans", tags=["scans"])
|
||||
|
||||
VALID_SORT_FIELDS = {
|
||||
"id": Scan.id,
|
||||
"package_name": Scan.package_name,
|
||||
"started_at": Scan.started_at,
|
||||
"status": Scan.status,
|
||||
"total_findings": Scan.total_findings,
|
||||
"flagged": Scan.flagged,
|
||||
}
|
||||
|
||||
|
||||
@router.get("")
|
||||
async def list_scans(
|
||||
limit: int = Query(50, le=200),
|
||||
offset: int = Query(0, ge=0),
|
||||
limit: int = Query(DEFAULT_PAGE_SIZE, le=MAX_PAGE_SIZE),
|
||||
offset: int = Query(DEFAULT_OFFSET, ge=0),
|
||||
flagged: bool | None = Query(None),
|
||||
search: str | None = Query(None),
|
||||
status: str | None = Query(None),
|
||||
repository: str | None = Query(None),
|
||||
sort_by: str = Query("started_at"),
|
||||
sort_dir: str = Query("desc"),
|
||||
sort_by: str = Query(DEFAULT_SORT_BY_SCANS),
|
||||
sort_dir: str = Query(DEFAULT_SORT_DIR),
|
||||
session: AsyncSession = Depends(get_session),
|
||||
):
|
||||
q = select(Scan)
|
||||
count_q = select(func.count(Scan.id))
|
||||
|
||||
if flagged is not None:
|
||||
q = q.where(Scan.flagged == flagged)
|
||||
count_q = count_q.where(Scan.flagged == flagged)
|
||||
if status:
|
||||
q = q.where(Scan.status == status)
|
||||
count_q = count_q.where(Scan.status == status)
|
||||
if repository:
|
||||
q = q.where(Scan.repository == repository)
|
||||
count_q = count_q.where(Scan.repository == repository)
|
||||
if search:
|
||||
pattern = f"%{search}%"
|
||||
condition = Scan.package_name.ilike(pattern) | Scan.package_version.ilike(pattern)
|
||||
q = q.where(condition)
|
||||
count_q = count_q.where(condition)
|
||||
|
||||
sort_field = VALID_SORT_FIELDS.get(sort_by, Scan.started_at)
|
||||
sort_dir = "asc" if sort_dir.lower() == "asc" else "desc"
|
||||
q = q.order_by(sort_field.desc() if sort_dir == "desc" else sort_field.asc())
|
||||
|
||||
q = q.offset(offset).limit(limit)
|
||||
|
||||
q, count_q = build_scan_list_query(
|
||||
flagged=flagged,
|
||||
status=status,
|
||||
repository=repository,
|
||||
search=search,
|
||||
sort_by=sort_by,
|
||||
sort_dir=sort_dir,
|
||||
limit=limit,
|
||||
offset=offset,
|
||||
)
|
||||
scans = (await session.execute(q)).scalars().all()
|
||||
total = await session.scalar(count_q)
|
||||
|
||||
scans = (await session.execute(q)).scalars().all()
|
||||
return {
|
||||
"total": total,
|
||||
"limit": limit,
|
||||
@@ -92,78 +78,57 @@ async def export_scans_csv(
|
||||
status: str | None = Query(None),
|
||||
session: AsyncSession = Depends(get_session),
|
||||
):
|
||||
q = select(Scan)
|
||||
|
||||
if flagged is not None:
|
||||
q = q.where(Scan.flagged == flagged)
|
||||
if status:
|
||||
q = q.where(Scan.status == status)
|
||||
if search:
|
||||
pattern = f"%{search}%"
|
||||
q = q.where(
|
||||
Scan.package_name.ilike(pattern) | Scan.package_version.ilike(pattern)
|
||||
)
|
||||
|
||||
q = q.order_by(Scan.started_at.desc())
|
||||
q, _count_q = build_scan_list_query(
|
||||
flagged=flagged,
|
||||
status=status,
|
||||
search=search,
|
||||
sort_by=DEFAULT_SORT_BY_SCANS,
|
||||
sort_dir=DEFAULT_SORT_DIR,
|
||||
limit=MAX_PAGE_SIZE,
|
||||
offset=0,
|
||||
)
|
||||
scans = (await session.execute(q)).scalars().all()
|
||||
|
||||
output = io.StringIO()
|
||||
writer = csv.writer(output)
|
||||
writer.writerow([
|
||||
"id", "package_name", "package_version", "ecosystem", "repository",
|
||||
"status", "total_findings", "flagged", "started_at", "finished_at",
|
||||
"error_message", "sha256"
|
||||
])
|
||||
writer.writerow(
|
||||
[
|
||||
"id", "package_name", "package_version", "ecosystem", "repository",
|
||||
"status", "total_findings", "flagged", "started_at", "finished_at",
|
||||
"error_message", "sha256",
|
||||
]
|
||||
)
|
||||
for s in scans:
|
||||
writer.writerow([
|
||||
s.id, s.package_name, s.package_version, s.ecosystem, s.repository,
|
||||
s.status, s.total_findings, s.flagged,
|
||||
s.started_at.isoformat() if s.started_at else "",
|
||||
s.finished_at.isoformat() if s.finished_at else "",
|
||||
s.error_message or "",
|
||||
s.sha256 or "",
|
||||
])
|
||||
writer.writerow(
|
||||
[
|
||||
s.id, s.package_name, s.package_version, s.ecosystem, s.repository,
|
||||
s.status, s.total_findings, s.flagged,
|
||||
s.started_at.isoformat() if s.started_at else "",
|
||||
s.finished_at.isoformat() if s.finished_at else "",
|
||||
s.error_message or "",
|
||||
s.sha256 or "",
|
||||
]
|
||||
)
|
||||
|
||||
return Response(
|
||||
content=output.getvalue(),
|
||||
media_type="text/csv",
|
||||
media_type=CSV_MEDIA_TYPE,
|
||||
headers={"Content-Disposition": "attachment; filename=scans_export.csv"},
|
||||
)
|
||||
|
||||
|
||||
@router.get("/stats")
|
||||
async def scan_stats(session: AsyncSession = Depends(get_session)):
|
||||
total_scans = await session.scalar(select(func.count(Scan.id)))
|
||||
flagged_scans = await session.scalar(select(func.count(Scan.id)).where(Scan.flagged == True))
|
||||
recent_flagged = await session.scalar(
|
||||
select(func.count(Scan.id)).where(
|
||||
Scan.flagged == True,
|
||||
Scan.started_at >= func.datetime("now", "-7 days"),
|
||||
)
|
||||
)
|
||||
total_findings = await session.scalar(select(func.count(Finding.id)))
|
||||
|
||||
top_rules = (
|
||||
await session.execute(
|
||||
select(
|
||||
func.json_extract(Finding.data, "$.rule").label("rule"),
|
||||
func.count(Finding.id).label("cnt"),
|
||||
)
|
||||
.group_by(text("rule"))
|
||||
.order_by(text("cnt DESC"))
|
||||
.limit(10)
|
||||
)
|
||||
).all()
|
||||
|
||||
latest_scan = await session.scalar(select(Scan).order_by(Scan.started_at.desc()).limit(1))
|
||||
|
||||
dashboard = await get_dashboard_stats(session)
|
||||
return {
|
||||
"total_scans": total_scans,
|
||||
"flagged_scans": flagged_scans,
|
||||
"recent_flagged": recent_flagged,
|
||||
"total_findings": total_findings,
|
||||
"top_rules": [{"rule": r.rule, "count": r.cnt} for r in top_rules],
|
||||
"latest_scan_at": latest_scan.started_at.isoformat() if latest_scan else None,
|
||||
"total_scans": dashboard["total_scans"],
|
||||
"flagged_scans": dashboard["flagged_scans"],
|
||||
"recent_flagged": dashboard["recent_flagged"],
|
||||
"total_findings": dashboard["total_findings"],
|
||||
"top_rules": dashboard["top_rules"],
|
||||
"latest_scan_at": dashboard["latest_flagged"][0].started_at.isoformat()
|
||||
if dashboard["latest_flagged"]
|
||||
else None,
|
||||
}
|
||||
|
||||
|
||||
@@ -188,5 +153,5 @@ async def get_scan(scan_id: int, session: AsyncSession = Depends(get_session)):
|
||||
"started_at": scan.started_at.isoformat() if scan.started_at else None,
|
||||
"finished_at": scan.finished_at.isoformat() if scan.finished_at else None,
|
||||
"error_message": scan.error_message,
|
||||
"findings": [{"id": f.id, **f.data} for f in scan.findings],
|
||||
"findings": [{"id": f.id, **f.data, "report": f.report} for f in scan.findings],
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user