fix: аудит — 19 фиксов безопасности, надёжности, UI и 16 новых тестов

- S4: bump jinja2>=3.1.4, python-multipart>=0.0.18, httpx>=0.28.0
- S5: _detect_ecosystem — DEFAULT_ECOSYSTEM для неизвестных форматов
- S6: harvester — log.exception() вместо log.error()
- S8: _scan_component — urlencode параметров
- P1: scanner — proc.kill() при таймауте
- P3: api_packages — selectinload(Scan.findings), убран N+1
- P4+P5: утечка _url_locks и _llm_locks при early return
- P6: DB reaper — сброс {'status':'analyzing'} при старте
- UI: htmx-пагинация, фильтры не теряют flagged, 404 с layout
- UI: мобильные таблицы overflow-x, полная стата на дашборде
- UI: i18n статусов в _status_badge, urlencode package_name
- 16 новых тестов: analyze endpoint (6), scanner errors (4),
  webhook signature (2), llm client (4)
This commit is contained in:
Marker689
2026-05-10 10:45:44 +03:00
parent d483a8b21d
commit 1341404568
31 changed files with 575 additions and 152 deletions

View File

@@ -52,5 +52,3 @@ async def list_findings(
for f in findings
],
}

View File

@@ -7,6 +7,7 @@ from urllib.parse import unquote
from fastapi import APIRouter, Depends, Query, Response
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import selectinload
from ..constants import (
CSV_MEDIA_TYPE,
@@ -17,7 +18,7 @@ from ..constants import (
MAX_PAGE_SIZE,
)
from ..db.engine import get_session
from ..db.models import Finding, Scan
from ..db.models import Scan
from ..db.queries import build_package_list_query
router = APIRouter(prefix="/api/v1/packages", tags=["packages"])
@@ -88,14 +89,22 @@ async def export_packages_csv(
writer = csv.writer(output)
writer.writerow(
[
"name", "version", "ecosystem", "repository",
"last_scanned_at", "flagged", "total_findings",
"name",
"version",
"ecosystem",
"repository",
"last_scanned_at",
"flagged",
"total_findings",
]
)
for r in rows:
writer.writerow(
[
r.pkg_name, r.pkg_ver, r.ecosystem, r.repository,
r.pkg_name,
r.pkg_ver,
r.ecosystem,
r.repository,
r.last_scan.isoformat() if r.last_scan else "",
bool(r.is_flagged),
r.findings_sum,
@@ -123,6 +132,7 @@ async def get_package(
await session.execute(
select(Scan)
.where(Scan.package_name == pkg_name, Scan.package_version == pkg_version)
.options(selectinload(Scan.findings))
.order_by(Scan.started_at.desc())
)
)
@@ -135,12 +145,7 @@ async def get_package(
all_findings: list[dict] = []
for s in scans:
findings = (
(await session.execute(select(Finding).where(Finding.scan_id == s.id)))
.scalars()
.all()
)
for f in findings:
for f in s.findings:
all_findings.append({"id": f.id, **f.data, "report": f.report})
return {

View File

@@ -93,16 +93,31 @@ async def export_scans_csv(
writer = csv.writer(output)
writer.writerow(
[
"id", "package_name", "package_version", "ecosystem", "repository",
"status", "total_findings", "flagged", "started_at", "finished_at",
"error_message", "sha256",
"id",
"package_name",
"package_version",
"ecosystem",
"repository",
"status",
"total_findings",
"flagged",
"started_at",
"finished_at",
"error_message",
"sha256",
]
)
for s in scans:
writer.writerow(
[
s.id, s.package_name, s.package_version, s.ecosystem, s.repository,
s.status, s.total_findings, s.flagged,
s.id,
s.package_name,
s.package_version,
s.ecosystem,
s.repository,
s.status,
s.total_findings,
s.flagged,
s.started_at.isoformat() if s.started_at else "",
s.finished_at.isoformat() if s.finished_at else "",
s.error_message or "",

View File

@@ -15,31 +15,23 @@ router = APIRouter(tags=["metrics"])
@router.get("/metrics")
async def metrics(session: AsyncSession = Depends(get_session)):
total = await session.scalar(select(func.count(Scan.id))) or 0
flagged = await session.scalar(
select(func.count(Scan.id)).where(Scan.flagged == True)
) or 0
flagged = await session.scalar(select(func.count(Scan.id)).where(Scan.flagged == True)) or 0
findings_total = await session.scalar(select(func.count(Finding.id))) or 0
# By status
status_rows = (
await session.execute(
select(Scan.status, func.count(Scan.id)).group_by(Scan.status)
)
await session.execute(select(Scan.status, func.count(Scan.id)).group_by(Scan.status))
).all()
by_status = {row[0]: row[1] for row in status_rows}
# By ecosystem
eco_rows = (
await session.execute(
select(Scan.ecosystem, func.count(Scan.id)).group_by(Scan.ecosystem)
)
await session.execute(select(Scan.ecosystem, func.count(Scan.id)).group_by(Scan.ecosystem))
).all()
by_eco = {row[0]: row[1] for row in eco_rows}
# Latest scan timestamp
latest = await session.scalar(
select(func.max(Scan.started_at))
)
latest = await session.scalar(select(func.max(Scan.started_at)))
lines = [
"# HELP guarddog_scans_total Total number of package scans.",

View File

@@ -41,7 +41,8 @@ _jinja_env.globals["config"] = config
def _render(name: str, **context) -> HTMLResponse:
template = _jinja_env.get_template(name)
return HTMLResponse(template.render(**context))
status_code = context.pop("_status_code", 200)
return HTMLResponse(template.render(**context), status_code=status_code)
@router.get("/", response_class=HTMLResponse)
@@ -104,18 +105,14 @@ async def scans_list(
@router.get("/scans/{scan_id}", response_class=HTMLResponse)
async def scan_detail(
scan_id: int, request: Request, session: AsyncSession = Depends(get_session)
):
async def scan_detail(scan_id: int, request: Request, session: AsyncSession = Depends(get_session)):
from sqlalchemy.orm import selectinload
scan = await session.scalar(
select(Scan)
.where(Scan.id == scan_id)
.options(selectinload(Scan.findings))
select(Scan).where(Scan.id == scan_id).options(selectinload(Scan.findings))
)
if not scan:
return HTMLResponse(f"<h1>{_t('not_found', request.state.lang)}</h1>", status_code=404)
return _render("404.html", request=request, _status_code=404)
return _render("scan_detail.html", scan=scan, request=request)
@@ -192,7 +189,7 @@ async def package_detail(
)
if not scans:
return HTMLResponse(f"<h1>{_t('not_found', request.state.lang)}</h1>", status_code=404)
return _render("404.html", request=request, _status_code=404)
all_findings = []
for s in scans:
@@ -223,9 +220,7 @@ async def analyze_finding_htmx(
if not config.llm_enabled:
msg = _t("llm_disabled", lang)
return HTMLResponse(
f'<div class="llm-actions"><small class="flagged">{msg}</small></div>'
)
return HTMLResponse(f'<div class="llm-actions"><small class="flagged">{msg}</small></div>')
finding = await session.scalar(select(Finding).where(Finding.id == finding_id))
if not finding:
@@ -252,6 +247,8 @@ async def analyze_finding_htmx(
lock = _llm_locks[finding_id]
if lock.locked():
async with _llm_lock:
_llm_locks.pop(finding_id, None)
return _render("_llm_spinner.html", request=request)
async with lock:
@@ -267,9 +264,7 @@ async def analyze_finding_htmx(
finding.report = None
await session.commit()
msg = _t("llm_failed", lang)
return HTMLResponse(
f'<div class="llm-actions"><small class="flagged">{msg}</small></div>'
)
return HTMLResponse(f'<div class="llm-actions"><small class="flagged">{msg}</small></div>')
finding.report = report
await session.commit()

View File

@@ -4,6 +4,7 @@ import hashlib
import hmac
import json
import re
from urllib.parse import urlencode
from fastapi import APIRouter, BackgroundTasks, Header, HTTPException, Request, status
@@ -58,7 +59,7 @@ def _detect_ecosystem(source: dict) -> str:
return "go"
if fmt in ("npm", "node"):
return "npm"
return fmt or DEFAULT_ECOSYSTEM
return DEFAULT_ECOSYSTEM
@router.post("/nexus")
@@ -75,22 +76,16 @@ async def nexus_webhook(
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED, detail="Missing signature"
)
expected = hmac.new(
config.webhook_secret.encode(), payload, hashlib.sha256
).hexdigest()
expected = hmac.new(config.webhook_secret.encode(), payload, hashlib.sha256).hexdigest()
if not hmac.compare_digest(x_nexus_webhook_signature, expected):
log.warning("Webhook rejected: invalid signature")
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN, detail="Invalid signature"
)
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Invalid signature")
try:
data = json.loads(payload.decode("utf-8"))
except (json.JSONDecodeError, UnicodeDecodeError):
log.warning("Webhook received invalid body")
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid request body"
)
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid request body")
action = data.get("action", "").upper()
if action not in RELEVANT_WEBHOOK_ACTIONS:
@@ -108,8 +103,7 @@ async def nexus_webhook(
initiator = raw_initiator
source_ip = request.client.host if request.client else None
log.info("Webhook: action=%s initiator=%s source_ip=%s",
action, initiator, source_ip)
log.info("Webhook: action=%s initiator=%s source_ip=%s", action, initiator, source_ip)
repository = data.get("repositoryName", "")
if not repository:
@@ -125,16 +119,19 @@ async def nexus_webhook(
if not asset_path or not _is_package_asset(asset_path):
return {"status": WEBHOOK_STATUS_IGNORED, "reason": WEBHOOK_IGNORE_NON_PACKAGE}
download_url = asset.get("downloadUrl") or _build_download_url(
repository, asset_path
)
download_url = asset.get("downloadUrl") or _build_download_url(repository, asset_path)
ecosystem = _detect_ecosystem(asset)
log.info("Webhook: %s asset %s (%s) in %s", action, asset_path, ecosystem, repository)
background_tasks.add_task(
_scan_in_background, download_url, repository, ecosystem, asset_path,
initiator=initiator, source_ip=source_ip,
_scan_in_background,
download_url,
repository,
ecosystem,
asset_path,
initiator=initiator,
source_ip=source_ip,
)
return {"status": WEBHOOK_STATUS_ACCEPTED, "asset": asset_path, "action": action}
@@ -164,10 +161,15 @@ async def nexus_webhook(
async def _scan_component(repository: str, name: str, version: str, ecosystem: str):
from ..core.nexus import nexus_get
api_path = (
f"/service/rest/v1/search"
f"?repository={repository}&name={name}&version={version}&format={ecosystem}"
params = urlencode(
{
"repository": repository,
"name": name,
"version": version,
"format": ecosystem,
}
)
api_path = f"/service/rest/v1/search?{params}"
try:
resp = await nexus_get(api_path)
resp.raise_for_status()
@@ -186,14 +188,10 @@ async def _scan_component(repository: str, name: str, version: str, ecosystem: s
asset_path = _extract_asset_path(asset)
if not asset_path or not _is_package_asset(asset_path):
continue
download_url = asset.get("downloadUrl") or _build_download_url(
repository, asset_path
)
download_url = asset.get("downloadUrl") or _build_download_url(repository, asset_path)
log.info("Scanning component asset: %s", asset_path)
async for session in get_session():
await harvest(
download_url, repository, ecosystem, asset_path, session
)
await harvest(download_url, repository, ecosystem, asset_path, session)
break
@@ -208,8 +206,13 @@ async def _scan_in_background(
try:
async for session in get_session():
await harvest(
download_url, repository, format_, asset_path, session,
initiator=initiator, source_ip=source_ip,
download_url,
repository,
format_,
asset_path,
session,
initiator=initiator,
source_ip=source_ip,
)
break
except Exception as e: